add DeveloperApi annotation

pdeyhim · Apr 9, 2014 · 8773d0d · 8773d0d
1 parent da31733
commit 8773d0d
Show file tree

Hide file tree

Showing 19 changed files with 64 additions and 16 deletions.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.api.python
 
 import java.nio.{ByteBuffer, ByteOrder}
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.classification._
 import org.apache.spark.mllib.clustering._
@@ -32,6 +33,7 @@ import org.apache.spark.rdd.RDD
  *
  * The Java stubs necessary for the Python mllib bindings.
  */
+@DeveloperApi
 class PythonMLLibAPI extends Serializable {
   private def deserializeDoubleVector(bytes: Array[Byte]): Array[Double] = {
     val packetLength = bytes.length

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -19,13 +19,15 @@ package org.apache.spark.mllib.optimization
 
 import breeze.linalg.{axpy => brzAxpy}
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
  * :: DeveloperApi ::
  *
  * Class used to compute the gradient for a loss function, given a single data point.
  */
+@DeveloperApi
 abstract class Gradient extends Serializable {
   /**
    * Compute the gradient and loss given the features of a single data point.
@@ -58,6 +60,7 @@ abstract class Gradient extends Serializable {
  * Compute gradient and loss for a logistic loss function, as used in binary classification.
  * See also the documentation for the precise formulation.
  */
+@DeveloperApi
 class LogisticGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val brzData = data.toBreeze
@@ -103,6 +106,7 @@ class LogisticGradient extends Gradient {
  *              L = 1/n ||A weights-y||^2
  * See also the documentation for the precise formulation.
  */
+@DeveloperApi
 class LeastSquaresGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val brzData = data.toBreeze
@@ -136,6 +140,7 @@ class LeastSquaresGradient extends Gradient {
  * See also the documentation for the precise formulation.
  * NOTE: This assumes that the labels are {0,1}
  */
+@DeveloperApi
 class HingeGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val brzData = data.toBreeze

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import breeze.linalg.{DenseVector => BDV}
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
@@ -32,6 +33,7 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
  * @param gradient Gradient function to be used.
  * @param updater Updater to be used to update weights after every iteration.
  */
+@DeveloperApi
 class GradientDescent(private var gradient: Gradient, private var updater: Updater)
   extends Optimizer with Logging
 {
@@ -114,6 +116,7 @@ class GradientDescent(private var gradient: Gradient, private var updater: Updat
  *
  * Top-level method to run gradient descent.
  */
+@DeveloperApi
 object GradientDescent extends Logging {
   /**
    * Run stochastic gradient descent (SGD) in parallel using mini batches.

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
@@ -19,13 +19,15 @@ package org.apache.spark.mllib.optimization
 
 import org.apache.spark.rdd.RDD
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.linalg.Vector
 
 /**
  * :: DeveloperApi ::
  *
  * Trait for optimization problem solvers.
  */
+@DeveloperApi
 trait Optimizer extends Serializable {
 
   /**

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -21,6 +21,7 @@ import scala.math._
 
 import breeze.linalg.{norm => brzNorm, axpy => brzAxpy, Vector => BV}
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
@@ -37,6 +38,7 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
  * The updater is responsible to also perform the update coming from the
  * regularization term R(w) (if any regularization is used).
  */
+@DeveloperApi
 abstract class Updater extends Serializable {
   /**
    * Compute an updated value for weights given the gradient, stepSize, iteration number and
@@ -66,6 +68,7 @@ abstract class Updater extends Serializable {
  * A simple updater for gradient descent *without* any regularization.
  * Uses a step-size decreasing with the square root of the number of iterations.
  */
+@DeveloperApi
 class SimpleUpdater extends Updater {
   override def compute(
       weightsOld: Vector,
@@ -101,6 +104,7 @@ class SimpleUpdater extends Updater {
  *
  * Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal)
  */
+@DeveloperApi
 class L1Updater extends Updater {
   override def compute(
       weightsOld: Vector,
@@ -132,6 +136,7 @@ class L1Updater extends Updater {
  *          R(w) = 1/2 ||w||^2
  * Uses a step-size decreasing with the square root of the number of iterations.
  */
+@DeveloperApi
 class SquaredL2Updater extends Updater {
   override def compute(
       weightsOld: Vector,

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.mllib.recommendation
 
+import org.jblas._
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.api.python.PythonMLLibAPI
 
-import org.jblas._
-import org.apache.spark.api.java.JavaRDD
-
 
 /**
  * Model representing the result of matrix factorization.

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.impurity
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: Experimental ::
  *
@@ -25,7 +27,7 @@ package org.apache.spark.mllib.tree.impurity
  */
 object Entropy extends Impurity {
 
-   private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
+  private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
 
   /**
    * :: DeveloperApi ::
@@ -35,16 +37,17 @@ object Entropy extends Impurity {
    * @param c1 count of instances with label 1
    * @return entropy value
    */
-   override def calculate(c0: Double, c1: Double): Double = {
-     if (c0 == 0 || c1 == 0) {
-       0
-     } else {
-       val total = c0 + c1
-       val f0 = c0 / total
-       val f1 = c1 / total
-       -(f0 * log2(f0)) - (f1 * log2(f1))
-     }
-   }
+  @DeveloperApi
+  override def calculate(c0: Double, c1: Double): Double = {
+    if (c0 == 0 || c1 == 0) {
+      0
+    } else {
+      val total = c0 + c1
+      val f0 = c0 / total
+      val f1 = c1 / total
+      -(f0 * log2(f0)) - (f1 * log2(f1))
+    }
+  }
 
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
     throw new UnsupportedOperationException("Entropy.calculate")

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.impurity
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: Experimental ::
  *
@@ -34,6 +36,7 @@ object Gini extends Impurity {
    * @param c1 count of instances with label 1
    * @return Gini coefficient value
    */
+  @DeveloperApi
   override def calculate(c0: Double, c1: Double): Double = {
     if (c0 == 0 || c1 == 0) {
       0

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.impurity
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: Experimental ::
  *
@@ -32,6 +34,7 @@ trait Impurity extends Serializable {
    * @param c1 count of instances with label 1
    * @return information value
    */
+  @DeveloperApi
   def calculate(c0 : Double, c1 : Double): Double
 
   /**
@@ -43,6 +46,6 @@ trait Impurity extends Serializable {
    * @param sumSquares summation of squares of the labels
    * @return information value
    */
+  @DeveloperApi
   def calculate(count: Double, sum: Double, sumSquares: Double): Double
-
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.impurity
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: Experimental ::
  *
@@ -34,6 +36,7 @@ object Variance extends Impurity {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    */
+  @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double = {
     val squaredLoss = sumSquares - (sum * sum) / count
     squaredLoss / count

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.model
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: DeveloperApi ::
  *
@@ -27,6 +29,7 @@ package org.apache.spark.mllib.tree.model
  * @param rightImpurity right node impurity
  * @param predict predicted value
  */
+@DeveloperApi
 class InformationGainStats(
     val gain: Double,
     val impurity: Double,

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.tree.model
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
 import org.apache.spark.mllib.tree.configuration.FeatureType._
 import org.apache.spark.mllib.linalg.Vector
@@ -33,6 +34,7 @@ import org.apache.spark.mllib.linalg.Vector
  * @param rightNode right child
  * @param stats information gain stats
  */
+@DeveloperApi
 class Node (
     val id: Int,
     val predict: Double,

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.tree.model
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
 
 /**
@@ -28,6 +29,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
  * @param featureType type of feature -- categorical or continuous
  * @param categories accepted values for categorical variables
  */
+@DeveloperApi
 case class Split(
     feature: Int,
     threshold: Double,

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.util
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -26,6 +27,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
  *
  * A collection of methods used to validate data before applying ML algorithms.
  */
+@DeveloperApi
 object DataValidators extends Logging {
 
   /**

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.util
 
 import scala.util.Random
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
@@ -29,7 +30,7 @@ import org.apache.spark.rdd.RDD
  * from a d-dimensional Gaussian distribution scaled by factor r and then creates a Gaussian
  * cluster with scale 1 around each center.
  */
-
+@DeveloperApi
 object KMeansDataGenerator {
 
   /**

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -22,6 +22,7 @@ import scala.util.Random
 
 import org.jblas.DoubleMatrix
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vectors
@@ -34,6 +35,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
  * uniformly random values for every feature and adds Gaussian noise with mean `eps` to the
  * response variable `Y`.
  */
+@DeveloperApi
 object LinearDataGenerator {
 
   /**

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.util
 
 import scala.util.Random
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -30,6 +31,7 @@ import org.apache.spark.mllib.linalg.Vectors
  * Generate test data for LogisticRegression. This class chooses positive labels
  * with probability `probOne` and scales features for positive examples by `eps`.
  */
+@DeveloperApi
 object LogisticRegressionDataGenerator {
 
   /**

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -21,6 +21,7 @@ import scala.util.Random
 
 import org.jblas.DoubleMatrix
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
@@ -49,6 +50,7 @@ import org.apache.spark.rdd.RDD
 *   test           (Boolean) Whether to create testing RDD.
 *   testSampFact   (Double) Percentage of training data to use as test data.
 */
+@DeveloperApi
 object MFDataGenerator {
   def main(args: Array[String]) {
     if (args.length < 2) {