Skip to content

Commit

Permalink
add Experimental annotation
Browse files Browse the repository at this point in the history
  • Loading branch information
mengxr committed Apr 9, 2014
1 parent 8773d0d commit 6b9f8e2
Show file tree
Hide file tree
Showing 15 changed files with 34 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.mllib.classification

import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}

import org.apache.spark.annotation.Experimental
import org.apache.spark.{Logging, SparkContext}
import org.apache.spark.SparkContext._
import org.apache.spark.mllib.linalg.Vector
Expand All @@ -36,6 +37,7 @@ import org.apache.spark.rdd.RDD
* @param theta log of class conditional probabilities, whose dimension is C-by-D,
* where D is number of features
*/
@Experimental
class NaiveBayesModel(
val labels: Array[Double],
val pi: Array[Double],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import scala.collection.mutable.ArrayBuffer

import breeze.linalg.{DenseVector => BDV, Vector => BV, norm => breezeNorm}

import org.apache.spark.annotation.Experimental
import org.apache.spark.{Logging, SparkContext}
import org.apache.spark.SparkContext._
import org.apache.spark.mllib.linalg.{Vector, Vectors}
Expand Down Expand Up @@ -400,6 +401,7 @@ object KMeans {
/**
* :: Experimental ::
*/
@Experimental
def main(args: Array[String]) {
if (args.length < 4) {
println("Usage: KMeans <master> <input_file> <k> <max_iterations> [<runs>]")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import scala.util.Sorting
import com.esotericsoftware.kryo.Kryo
import org.jblas.{DoubleMatrix, SimpleBlas, Solve}

import org.apache.spark.annotation.Experimental
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.{Logging, HashPartitioner, Partitioner, SparkContext, SparkConf}
import org.apache.spark.storage.StorageLevel
Expand Down Expand Up @@ -142,6 +143,7 @@ class ALS private (
*
* Sets the constant used in computing confidence in implicit ALS. Default: 1.0.
*/
@Experimental
def setAlpha(alpha: Double): ALS = {
this.alpha = alpha
this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.mllib.regression

import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}

import org.apache.spark.annotation.Experimental
import org.apache.spark.{Logging, SparkException}
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.optimization._
Expand Down Expand Up @@ -105,6 +106,7 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
*
* Set if the algorithm should validate data before training. Default true.
*/
@Experimental
def setValidateData(validateData: Boolean): this.type = {
this.validateData = validateData
this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.mllib.tree

import scala.util.control.Breaks._

import org.apache.spark.annotation.Experimental
import org.apache.spark.{Logging, SparkContext}
import org.apache.spark.SparkContext._
import org.apache.spark.mllib.regression.LabeledPoint
Expand All @@ -41,6 +42,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
* of algorithm (classification, regression, etc.), feature type (continuous,
* categorical), depth of the tree, quantile calculation strategy, etc.
*/
@Experimental
class DecisionTree (private val strategy: Strategy) extends Serializable with Logging {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@

package org.apache.spark.mllib.tree.configuration

import org.apache.spark.annotation.Experimental

/**
* :: Experimental ::
*
* Enum to select the algorithm for the decision tree
*/
@Experimental
object Algo extends Enumeration {
type Algo = Value
val Classification, Regression = Value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@

package org.apache.spark.mllib.tree.configuration

import org.apache.spark.annotation.Experimental

/**
* :: Experimental ::
*
* Enum to describe whether a feature is "continuous" or "categorical"
*/
@Experimental
object FeatureType extends Enumeration {
type FeatureType = Value
val Continuous, Categorical = Value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@

package org.apache.spark.mllib.tree.configuration

import org.apache.spark.annotation.Experimental

/**
* :: Experimental ::
*
* Enum for selecting the quantile calculation strategy
*/
@Experimental
object QuantileStrategy extends Enumeration {
type QuantileStrategy = Value
val Sort, MinMax, ApproxHist = Value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.mllib.tree.configuration

import org.apache.spark.annotation.Experimental
import org.apache.spark.mllib.tree.impurity.Impurity
import org.apache.spark.mllib.tree.configuration.Algo._
import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
Expand All @@ -36,6 +37,7 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
* 1, 2, ... , k-1. It's important to note that features are
* zero-indexed.
*/
@Experimental
class Strategy (
val algo: Algo,
val impurity: Impurity,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@

package org.apache.spark.mllib.tree.impurity

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.annotation.{DeveloperApi, Experimental}

/**
* :: Experimental ::
*
* Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
* binary classification.
*/
@Experimental
object Entropy extends Impurity {

private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.mllib.tree.impurity

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.annotation.{DeveloperApi, Experimental}

/**
* :: Experimental ::
Expand All @@ -26,6 +26,7 @@ import org.apache.spark.annotation.DeveloperApi
* [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
* during binary classification.
*/
@Experimental
object Gini extends Impurity {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@

package org.apache.spark.mllib.tree.impurity

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.annotation.{DeveloperApi, Experimental}

/**
* :: Experimental ::
*
* Trait for calculating information gain.
*/
@Experimental
trait Impurity extends Serializable {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@

package org.apache.spark.mllib.tree.impurity

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.annotation.{DeveloperApi, Experimental}

/**
* :: Experimental ::
*
* Class for calculating variance during regression
*/
@Experimental
object Variance extends Impurity {
override def calculate(c0: Double, c1: Double): Double =
throw new UnsupportedOperationException("Variance.calculate")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.mllib.tree.model

import org.apache.spark.annotation.Experimental
import org.apache.spark.mllib.tree.configuration.Algo._
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.Vector
Expand All @@ -28,6 +29,7 @@ import org.apache.spark.mllib.linalg.Vector
* @param topNode root node
* @param algo algorithm type -- classification or regression
*/
@Experimental
class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable {

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.mllib.util
import breeze.linalg.{Vector => BV, DenseVector => BDV, SparseVector => BSV,
squaredDistance => breezeSquaredDistance}

import org.apache.spark.annotation.Experimental
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.regression.LabeledPoint
Expand Down Expand Up @@ -133,6 +134,7 @@ object MLUtils {
* @return An RDD of LabeledPoint. Each labeled point has two elements: the first element is
* the label, and the second element represents the feature values (an array of Double).
*/
@Experimental
def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = {
sc.textFile(dir).map { line =>
val parts = line.split(',')
Expand All @@ -152,6 +154,7 @@ object MLUtils {
* @param data An RDD of LabeledPoints containing data to be saved.
* @param dir Directory to save the data.
*/
@Experimental
def saveLabeledData(data: RDD[LabeledPoint], dir: String) {
val dataStr = data.map(x => x.label + "," + x.features.toArray.mkString(" "))
dataStr.saveAsTextFile(dir)
Expand Down

0 comments on commit 6b9f8e2

Please sign in to comment.