apache · tdas · Sep 30, 2016 · Sep 30, 2016 · Sep 30, 2016 · Sep 30, 2016
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
@@ -53,7 +53,14 @@ object MimaExcludes {
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.getFunction"),
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.databaseExists"),
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.tableExists"),
-      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.functionExists")
+      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.functionExists"),
+      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.columnExists"),
+
+      // [SPARK-17731] Metrics for structured streaming
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.SinkStatus.this"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.SourceStatus.this"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryInfo.this"),
+      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.status")
     )
   }
 

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -164,6 +164,13 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     ret
   }
 
+  /**
+   * Returns a Seq containing the leaves in this tree.
+   */
+  def collectLeaves(): Seq[BaseType] = {
+    this.collect { case p if p.children.isEmpty => p }
+  }
+
   /**
    * Finds and returns the first [[TreeNode]] of the tree for which the given partial function
    * is defined (pre-order), and applies the partial function to it.

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.execution
+import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.execution.SparkPlan
 
@@ -56,7 +57,12 @@ case class StateStoreRestoreExec(
     child: SparkPlan)
   extends execution.UnaryExecNode with StatefulOperator {
 
+  override lazy val metrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
+
   override protected def doExecute(): RDD[InternalRow] = {
+    val numOutputRows = longMetric("numOutputRows")
+
     child.execute().mapPartitionsWithStateStore(
       getStateId.checkpointLocation,
       operatorId = getStateId.operatorId,
@@ -69,6 +75,7 @@ case class StateStoreRestoreExec(
         iter.flatMap { row =>
           val key = getKey(row)
           val savedState = store.get(key)
+          numOutputRows += 1
           row +: savedState.toSeq
         }
     }
@@ -86,7 +93,13 @@ case class StateStoreSaveExec(
     child: SparkPlan)
   extends execution.UnaryExecNode with StatefulOperator {
 
+  override lazy val metrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
+    "numTotalStateRows" -> SQLMetrics.createMetric(sparkContext, "number of total state rows"),
+    "numUpdatedStateRows" -> SQLMetrics.createMetric(sparkContext, "number of updated state rows"))
+
   override protected def doExecute(): RDD[InternalRow] = {
+    metrics // force lazy init at driver
     assert(returnAllStates.nonEmpty,
       "Incorrect planning in IncrementalExecution, returnAllStates have not been set")
     val saveAndReturnFunc = if (returnAllStates.get) saveAndReturnAll _ else saveAndReturnUpdated _
@@ -111,13 +124,18 @@ case class StateStoreSaveExec(
   private def saveAndReturnUpdated(
       store: StateStore,
       iter: Iterator[InternalRow]): Iterator[InternalRow] = {
+    val numOutputRows = longMetric("numOutputRows")
+    val numTotalStateRows = longMetric("numTotalStateRows")
+    val numUpdatedStateRows = longMetric("numUpdatedStateRows")
+
     new Iterator[InternalRow] {
       private[this] val baseIterator = iter
       private[this] val getKey = GenerateUnsafeProjection.generate(keyExpressions, child.output)
 
       override def hasNext: Boolean = {
         if (!baseIterator.hasNext) {
           store.commit()
+          numTotalStateRows += store.numKeys()
           false
         } else {
           true
@@ -128,6 +146,8 @@ case class StateStoreSaveExec(
         val row = baseIterator.next().asInstanceOf[UnsafeRow]
         val key = getKey(row)
         store.put(key.copy(), row.copy())
+        numOutputRows += 1
+        numUpdatedStateRows += 1
         row
       }
     }
@@ -142,12 +162,21 @@ case class StateStoreSaveExec(
       store: StateStore,
       iter: Iterator[InternalRow]): Iterator[InternalRow] = {
     val getKey = GenerateUnsafeProjection.generate(keyExpressions, child.output)
+    val numOutputRows = longMetric("numOutputRows")
+    val numTotalStateRows = longMetric("numTotalStateRows")
+    val numUpdatedStateRows = longMetric("numUpdatedStateRows")
+
     while (iter.hasNext) {
       val row = iter.next().asInstanceOf[UnsafeRow]
       val key = getKey(row)
       store.put(key.copy(), row.copy())
+      numUpdatedStateRows += 1
     }
     store.commit()
-    store.iterator().map(_._2.asInstanceOf[InternalRow])
+    numTotalStateRows += store.numKeys()
+    store.iterator().map { case (k, v) =>
+      numOutputRows += 1
+      v.asInstanceOf[InternalRow]
+    }
   }
 }