Skip to content

Commit

Permalink
add DeveloperApi annotation
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Apr 9, 2014
1 parent da31733 commit 8773d0d
Show file tree
Hide file tree
Showing 19 changed files with 64 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.mllib.api.python

import java.nio.{ByteBuffer, ByteOrder}

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.classification._
import org.apache.spark.mllib.clustering._
Expand All @@ -32,6 +33,7 @@ import org.apache.spark.rdd.RDD
*
* The Java stubs necessary for the Python mllib bindings.
*/
@DeveloperApi
class PythonMLLibAPI extends Serializable {
private def deserializeDoubleVector(bytes: Array[Byte]): Array[Double] = {
val packetLength = bytes.length
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ package org.apache.spark.mllib.optimization

import breeze.linalg.{axpy => brzAxpy}

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.mllib.linalg.{Vectors, Vector}

/**
* :: DeveloperApi ::
*
* Class used to compute the gradient for a loss function, given a single data point.
*/
@DeveloperApi
abstract class Gradient extends Serializable {
/**
* Compute the gradient and loss given the features of a single data point.
Expand Down Expand Up @@ -58,6 +60,7 @@ abstract class Gradient extends Serializable {
* Compute gradient and loss for a logistic loss function, as used in binary classification.
* See also the documentation for the precise formulation.
*/
@DeveloperApi
class LogisticGradient extends Gradient {
override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
val brzData = data.toBreeze
Expand Down Expand Up @@ -103,6 +106,7 @@ class LogisticGradient extends Gradient {
* L = 1/n ||A weights-y||^2
* See also the documentation for the precise formulation.
*/
@DeveloperApi
class LeastSquaresGradient extends Gradient {
override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
val brzData = data.toBreeze
Expand Down Expand Up @@ -136,6 +140,7 @@ class LeastSquaresGradient extends Gradient {
* See also the documentation for the precise formulation.
* NOTE: This assumes that the labels are {0,1}
*/
@DeveloperApi
class HingeGradient extends Gradient {
override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
val brzData = data.toBreeze
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import scala.collection.mutable.ArrayBuffer

import breeze.linalg.{DenseVector => BDV}

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.{Vectors, Vector}
Expand All @@ -32,6 +33,7 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
* @param gradient Gradient function to be used.
* @param updater Updater to be used to update weights after every iteration.
*/
@DeveloperApi
class GradientDescent(private var gradient: Gradient, private var updater: Updater)
extends Optimizer with Logging
{
Expand Down Expand Up @@ -114,6 +116,7 @@ class GradientDescent(private var gradient: Gradient, private var updater: Updat
*
* Top-level method to run gradient descent.
*/
@DeveloperApi
object GradientDescent extends Logging {
/**
* Run stochastic gradient descent (SGD) in parallel using mini batches.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ package org.apache.spark.mllib.optimization

import org.apache.spark.rdd.RDD

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.mllib.linalg.Vector

/**
* :: DeveloperApi ::
*
* Trait for optimization problem solvers.
*/
@DeveloperApi
trait Optimizer extends Serializable {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import scala.math._

import breeze.linalg.{norm => brzNorm, axpy => brzAxpy, Vector => BV}

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.mllib.linalg.{Vectors, Vector}

/**
Expand All @@ -37,6 +38,7 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
* The updater is responsible to also perform the update coming from the
* regularization term R(w) (if any regularization is used).
*/
@DeveloperApi
abstract class Updater extends Serializable {
/**
* Compute an updated value for weights given the gradient, stepSize, iteration number and
Expand Down Expand Up @@ -66,6 +68,7 @@ abstract class Updater extends Serializable {
* A simple updater for gradient descent *without* any regularization.
* Uses a step-size decreasing with the square root of the number of iterations.
*/
@DeveloperApi
class SimpleUpdater extends Updater {
override def compute(
weightsOld: Vector,
Expand Down Expand Up @@ -101,6 +104,7 @@ class SimpleUpdater extends Updater {
*
* Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal)
*/
@DeveloperApi
class L1Updater extends Updater {
override def compute(
weightsOld: Vector,
Expand Down Expand Up @@ -132,6 +136,7 @@ class L1Updater extends Updater {
* R(w) = 1/2 ||w||^2
* Uses a step-size decreasing with the square root of the number of iterations.
*/
@DeveloperApi
class SquaredL2Updater extends Updater {
override def compute(
weightsOld: Vector,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@

package org.apache.spark.mllib.recommendation

import org.jblas._

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._
import org.apache.spark.mllib.api.python.PythonMLLibAPI

import org.jblas._
import org.apache.spark.api.java.JavaRDD


/**
* Model representing the result of matrix factorization.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.mllib.tree.impurity

import org.apache.spark.annotation.DeveloperApi

/**
* :: Experimental ::
*
Expand All @@ -25,7 +27,7 @@ package org.apache.spark.mllib.tree.impurity
*/
object Entropy extends Impurity {

private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)

/**
* :: DeveloperApi ::
Expand All @@ -35,16 +37,17 @@ object Entropy extends Impurity {
* @param c1 count of instances with label 1
* @return entropy value
*/
override def calculate(c0: Double, c1: Double): Double = {
if (c0 == 0 || c1 == 0) {
0
} else {
val total = c0 + c1
val f0 = c0 / total
val f1 = c1 / total
-(f0 * log2(f0)) - (f1 * log2(f1))
}
}
@DeveloperApi
override def calculate(c0: Double, c1: Double): Double = {
if (c0 == 0 || c1 == 0) {
0
} else {
val total = c0 + c1
val f0 = c0 / total
val f1 = c1 / total
-(f0 * log2(f0)) - (f1 * log2(f1))
}
}

override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
throw new UnsupportedOperationException("Entropy.calculate")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.mllib.tree.impurity

import org.apache.spark.annotation.DeveloperApi

/**
* :: Experimental ::
*
Expand All @@ -34,6 +36,7 @@ object Gini extends Impurity {
* @param c1 count of instances with label 1
* @return Gini coefficient value
*/
@DeveloperApi
override def calculate(c0: Double, c1: Double): Double = {
if (c0 == 0 || c1 == 0) {
0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.mllib.tree.impurity

import org.apache.spark.annotation.DeveloperApi

/**
* :: Experimental ::
*
Expand All @@ -32,6 +34,7 @@ trait Impurity extends Serializable {
* @param c1 count of instances with label 1
* @return information value
*/
@DeveloperApi
def calculate(c0 : Double, c1 : Double): Double

/**
Expand All @@ -43,6 +46,6 @@ trait Impurity extends Serializable {
* @param sumSquares summation of squares of the labels
* @return information value
*/
@DeveloperApi
def calculate(count: Double, sum: Double, sumSquares: Double): Double

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.mllib.tree.impurity

import org.apache.spark.annotation.DeveloperApi

/**
* :: Experimental ::
*
Expand All @@ -34,6 +36,7 @@ object Variance extends Impurity {
* @param sum sum of labels
* @param sumSquares summation of squares of the labels
*/
@DeveloperApi
override def calculate(count: Double, sum: Double, sumSquares: Double): Double = {
val squaredLoss = sumSquares - (sum * sum) / count
squaredLoss / count
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.mllib.tree.model

import org.apache.spark.annotation.DeveloperApi

/**
* :: DeveloperApi ::
*
Expand All @@ -27,6 +29,7 @@ package org.apache.spark.mllib.tree.model
* @param rightImpurity right node impurity
* @param predict predicted value
*/
@DeveloperApi
class InformationGainStats(
val gain: Double,
val impurity: Double,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.mllib.tree.model

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.Logging
import org.apache.spark.mllib.tree.configuration.FeatureType._
import org.apache.spark.mllib.linalg.Vector
Expand All @@ -33,6 +34,7 @@ import org.apache.spark.mllib.linalg.Vector
* @param rightNode right child
* @param stats information gain stats
*/
@DeveloperApi
class Node (
val id: Int,
val predict: Double,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.mllib.tree.model

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType

/**
Expand All @@ -28,6 +29,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
* @param featureType type of feature -- categorical or continuous
* @param categories accepted values for categorical variables
*/
@DeveloperApi
case class Split(
feature: Int,
threshold: Double,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.mllib.util

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.regression.LabeledPoint
Expand All @@ -26,6 +27,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
*
* A collection of methods used to validate data before applying ML algorithms.
*/
@DeveloperApi
object DataValidators extends Logging {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.mllib.util

import scala.util.Random

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

Expand All @@ -29,7 +30,7 @@ import org.apache.spark.rdd.RDD
* from a d-dimensional Gaussian distribution scaled by factor r and then creates a Gaussian
* cluster with scale 1 around each center.
*/

@DeveloperApi
object KMeansDataGenerator {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import scala.util.Random

import org.jblas.DoubleMatrix

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.Vectors
Expand All @@ -34,6 +35,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
* uniformly random values for every feature and adds Gaussian noise with mean `eps` to the
* response variable `Y`.
*/
@DeveloperApi
object LinearDataGenerator {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.mllib.util

import scala.util.Random

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.regression.LabeledPoint
Expand All @@ -30,6 +31,7 @@ import org.apache.spark.mllib.linalg.Vectors
* Generate test data for LogisticRegression. This class chooses positive labels
* with probability `probOne` and scales features for positive examples by `eps`.
*/
@DeveloperApi
object LogisticRegressionDataGenerator {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import scala.util.Random

import org.jblas.DoubleMatrix

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

Expand Down Expand Up @@ -49,6 +50,7 @@ import org.apache.spark.rdd.RDD
* test (Boolean) Whether to create testing RDD.
* testSampFact (Double) Percentage of training data to use as test data.
*/
@DeveloperApi
object MFDataGenerator {
def main(args: Array[String]) {
if (args.length < 2) {
Expand Down
Loading

0 comments on commit 8773d0d

Please sign in to comment.