Skip to content

Commit

Permalink
Fix warnings and javadoc8 break
Browse files Browse the repository at this point in the history
  • Loading branch information
HyukjinKwon committed Jan 30, 2017
1 parent ade075a commit 0880b9b
Show file tree
Hide file tree
Showing 50 changed files with 177 additions and 172 deletions.
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/FutureAction.scala
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ trait FutureAction[T] extends Future[T] {
*
* @param atMost maximum wait time, which may be negative (no waiting is done), Duration.Inf
* for unbounded waiting, or a finite positive duration
* @throws Exception exception during action execution
* @note Throws `Exception` exception during action execution
* @return the result value if the action is completed within the specific maximum wait time
*/
@throws(classOf[Exception])
Expand Down
12 changes: 6 additions & 6 deletions core/src/main/scala/org/apache/spark/SparkConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
/**
* Get a time parameter as seconds; throws a NoSuchElementException if it's not set. If no
* suffix is provided then seconds are assumed.
* @throws java.util.NoSuchElementException
* @note Throws `java.util.NoSuchElementException`
*/
def getTimeAsSeconds(key: String): Long = {
Utils.timeStringAsSeconds(get(key))
Expand All @@ -279,7 +279,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
/**
* Get a time parameter as milliseconds; throws a NoSuchElementException if it's not set. If no
* suffix is provided then milliseconds are assumed.
* @throws java.util.NoSuchElementException
* @note Throws `java.util.NoSuchElementException`
*/
def getTimeAsMs(key: String): Long = {
Utils.timeStringAsMs(get(key))
Expand All @@ -296,7 +296,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
/**
* Get a size parameter as bytes; throws a NoSuchElementException if it's not set. If no
* suffix is provided then bytes are assumed.
* @throws java.util.NoSuchElementException
* @note Throws `java.util.NoSuchElementException`
*/
def getSizeAsBytes(key: String): Long = {
Utils.byteStringAsBytes(get(key))
Expand All @@ -320,7 +320,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
/**
* Get a size parameter as Kibibytes; throws a NoSuchElementException if it's not set. If no
* suffix is provided then Kibibytes are assumed.
* @throws java.util.NoSuchElementException
* @note Throws `java.util.NoSuchElementException`
*/
def getSizeAsKb(key: String): Long = {
Utils.byteStringAsKb(get(key))
Expand All @@ -337,7 +337,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
/**
* Get a size parameter as Mebibytes; throws a NoSuchElementException if it's not set. If no
* suffix is provided then Mebibytes are assumed.
* @throws java.util.NoSuchElementException
* @note Throws `java.util.NoSuchElementException`
*/
def getSizeAsMb(key: String): Long = {
Utils.byteStringAsMb(get(key))
Expand All @@ -354,7 +354,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
/**
* Get a size parameter as Gibibytes; throws a NoSuchElementException if it's not set. If no
* suffix is provided then Gibibytes are assumed.
* @throws java.util.NoSuchElementException
* @note Throws `java.util.NoSuchElementException`
*/
def getSizeAsGb(key: String): Long = {
Utils.byteStringAsGb(get(key))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
* or `PairRDDFunctions.reduceByKey` will provide much better performance.
*
* @note As currently implemented, groupByKey must be able to hold all the key-value pairs for any
* key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]].
* key in memory. If a key has too many values, it can result in an `OutOfMemoryError`.
*/
def groupByKey(partitioner: Partitioner): RDD[(K, Iterable[V])] = self.withScope {
// groupByKey shouldn't use map side combine because map side combine does not
Expand All @@ -520,7 +520,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
* or `PairRDDFunctions.reduceByKey` will provide much better performance.
*
* @note As currently implemented, groupByKey must be able to hold all the key-value pairs for any
* key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]].
* key in memory. If a key has too many values, it can result in an `OutOfMemoryError`.
*/
def groupByKey(numPartitions: Int): RDD[(K, Iterable[V])] = self.withScope {
groupByKey(new HashPartitioner(numPartitions))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ private[spark] trait SparkListenerInterface {

/**
* :: DeveloperApi ::
* A default implementation for [[SparkListenerInterface]] that has no-op implementations for
* A default implementation for `SparkListenerInterface` that has no-op implementations for
* all callbacks.
*
* Note that this is an internal interface which might change in different Spark releases.
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/scala/org/apache/spark/scheduler/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark

/**
* Spark's scheduling components. This includes the [[org.apache.spark.scheduler.DAGScheduler]] and
* lower level [[org.apache.spark.scheduler.TaskScheduler]].
* Spark's scheduling components. This includes the `org.apache.spark.scheduler.DAGScheduler` and
* lower level `org.apache.spark.scheduler.TaskScheduler`.
*/
package object scheduler
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class DefaultTopologyMapper(conf: SparkConf) extends TopologyMapper(conf) with L

/**
* A simple file based topology mapper. This expects topology information provided as a
* [[java.util.Properties]] file. The name of the file is obtained from SparkConf property
* `java.util.Properties` file. The name of the file is obtained from SparkConf property
* `spark.storage.replication.topologyFile`. To use this topology mapper, set the
* `spark.storage.replication.topologyMapper` property to
* [[org.apache.spark.storage.FileBasedTopologyMapper]]
Expand Down
2 changes: 1 addition & 1 deletion docs/js/api-docs.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ $(document).ready(function() {
MathJax.Hub.Config({
displayAlign: "left",
tex2jax: {
inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
inlineMath: [ ["$", "$"], ["\\(","\\)"] ],
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
processEscapes: true,
skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'a']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ sealed trait Vector extends Serializable {
/**
* Factory methods for [[org.apache.spark.ml.linalg.Vector]].
* We don't use the name `Vector` because Scala imports
* [[scala.collection.immutable.Vector]] by default.
* `scala.collection.immutable.Vector` by default.
*/
@Since("2.0.0")
object Vectors {
Expand Down
16 changes: 8 additions & 8 deletions mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ private[ml] trait PredictorParams extends Params
* @param schema input schema
* @param fitting whether this is in fitting
* @param featuresDataType SQL DataType for FeaturesType.
* E.g., [[VectorUDT]] for vector features.
* E.g., `VectorUDT` for vector features.
* @return output schema
*/
protected def validateAndTransformSchema(
Expand Down Expand Up @@ -72,7 +72,7 @@ private[ml] trait PredictorParams extends Params
* in `fit()`.
*
* @tparam FeaturesType Type of features.
* E.g., [[VectorUDT]] for vector features.
* E.g., `VectorUDT` for vector features.
* @tparam Learner Specialization of this class. If you subclass this type, use this type
* parameter to specify the concrete type.
* @tparam M Specialization of [[PredictionModel]]. If you subclass this type, use this type
Expand Down Expand Up @@ -122,7 +122,7 @@ abstract class Predictor[

/**
* Train a model using the given dataset and parameters.
* Developers can implement this instead of [[fit()]] to avoid dealing with schema validation
* Developers can implement this instead of `fit()` to avoid dealing with schema validation
* and copying parameters into the model.
*
* @param dataset Training dataset
Expand All @@ -133,7 +133,7 @@ abstract class Predictor[
/**
* Returns the SQL DataType corresponding to the FeaturesType type parameter.
*
* This is used by [[validateAndTransformSchema()]].
* This is used by `validateAndTransformSchema()`.
* This workaround is needed since SQL has different APIs for Scala and Java.
*
* The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
Expand All @@ -160,7 +160,7 @@ abstract class Predictor[
* Abstraction for a model for prediction tasks (regression and classification).
*
* @tparam FeaturesType Type of features.
* E.g., [[VectorUDT]] for vector features.
* E.g., `VectorUDT` for vector features.
* @tparam M Specialization of [[PredictionModel]]. If you subclass this type, use this type
* parameter to specify the concrete type for the corresponding model.
*/
Expand All @@ -181,7 +181,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
/**
* Returns the SQL DataType corresponding to the FeaturesType type parameter.
*
* This is used by [[validateAndTransformSchema()]].
* This is used by `validateAndTransformSchema()`.
* This workaround is needed since SQL has different APIs for Scala and Java.
*
* The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
Expand All @@ -197,7 +197,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
* the predictions as a new column [[predictionCol]].
*
* @param dataset input dataset
* @return transformed dataset with [[predictionCol]] of type [[Double]]
* @return transformed dataset with [[predictionCol]] of type `Double`
*/
override def transform(dataset: Dataset[_]): DataFrame = {
transformSchema(dataset.schema, logging = true)
Expand All @@ -219,7 +219,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,

/**
* Predict label for the given features.
* This internal method is used to implement [[transform()]] and output [[predictionCol]].
* This internal method is used to implement `transform()` and output [[predictionCol]].
*/
protected def predict(features: FeaturesType): Double
}
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ private[attribute] trait AttributeFactory {
private[attribute] def fromMetadata(metadata: Metadata): Attribute

/**
* Creates an [[Attribute]] from a [[StructField]] instance, optionally preserving name.
* Creates an [[Attribute]] from a `StructField` instance, optionally preserving name.
*/
private[ml] def decodeStructField(field: StructField, preserveName: Boolean): Attribute = {
require(field.dataType.isInstanceOf[NumericType])
Expand All @@ -145,7 +145,7 @@ private[attribute] trait AttributeFactory {
}

/**
* Creates an [[Attribute]] from a [[StructField]] instance.
* Creates an [[Attribute]] from a `StructField` instance.
*/
def fromStructField(field: StructField): Attribute = decodeStructField(field, false)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.spark.sql.DataFrame
/**
* ==ML attributes==
*
* The ML pipeline API uses [[DataFrame]]s as ML datasets.
* The ML pipeline API uses `DataFrame`s as ML datasets.
* Each dataset consists of typed columns, e.g., string, double, vector, etc.
* However, knowing only the column type may not be sufficient to handle the data properly.
* For instance, a double column with values 0.0, 1.0, 2.0, ... may represent some label indices,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ abstract class Classifier[
* and put it in an RDD with strong types.
*
* @param dataset DataFrame with columns for labels ([[org.apache.spark.sql.types.NumericType]])
* and features ([[Vector]]).
* and features (`Vector`).
* @param numClasses Number of classes label can take. Labels must be integers in the range
* [0, numClasses).
* @throws SparkException if any label is not an integer >= 0
Expand All @@ -94,14 +94,14 @@ abstract class Classifier[
* by finding the maximum label value.
*
* Label validation (ensuring all labels are integers >= 0) needs to be handled elsewhere,
* such as in [[extractLabeledPoints()]].
* such as in `extractLabeledPoints()`.
*
* @param dataset Dataset which contains a column [[labelCol]]
* @param maxNumClasses Maximum number of classes allowed when inferred from data. If numClasses
* is specified in the metadata, then maxNumClasses is ignored.
* @return number of classes
* @throws IllegalArgumentException if metadata does not specify numClasses, and the
* actual numClasses exceeds maxNumClasses
* @note Throws `IllegalArgumentException` if metadata does not specify numClasses, and the
* actual numClasses exceeds maxNumClasses
*/
protected def getNumClasses(dataset: Dataset[_], maxNumClasses: Int = 100): Int = {
MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match {
Expand Down Expand Up @@ -150,7 +150,7 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
/**
* Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
* parameters:
* - predicted labels as [[predictionCol]] of type [[Double]]
* - predicted labels as [[predictionCol]] of type `Double`
* - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector`.
*
* @param dataset input dataset
Expand Down Expand Up @@ -192,10 +192,10 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur

/**
* Predict label for the given features.
* This internal method is used to implement [[transform()]] and output [[predictionCol]].
* This internal method is used to implement `transform()` and output [[predictionCol]].
*
* This default implementation for classification predicts the index of the maximum value
* from [[predictRaw()]].
* from `predictRaw()`.
*/
override protected def predict(features: FeaturesType): Double = {
raw2prediction(predictRaw(features))
Expand All @@ -205,7 +205,7 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
* Raw prediction for each possible label.
* The meaning of a "raw" prediction may vary between algorithms, but it intuitively gives
* a measure of confidence in each possible label (where larger = more confident).
* This internal method is used to implement [[transform()]] and output [[rawPredictionCol]].
* This internal method is used to implement `transform()` and output [[rawPredictionCol]].
*
* @return vector where element i is the raw prediction for label i.
* This raw prediction may be any real number, where a larger value indicates greater
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
* Otherwise, returns `threshold` if set, or its default value if unset.
*
* @group getParam
* @throws IllegalArgumentException if `thresholds` is set to an array of length other than 2.
* @note Throws `IllegalArgumentException` if `thresholds` is set to an array of length
* other than 2.
*/
override def getThreshold: Double = {
checkThresholdConsistency()
Expand Down Expand Up @@ -161,7 +162,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
/**
* If `threshold` and `thresholds` are both set, ensures they are consistent.
*
* @throws IllegalArgumentException if `threshold` and `thresholds` are not equivalent
* @note Throws `IllegalArgumentException` if `threshold` and `thresholds` are not equivalent
*/
protected def checkThresholdConsistency(): Unit = {
if (isSet(threshold) && isSet(thresholds)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (

/**
* Train a model using the given dataset and parameters.
* Developers can implement this instead of [[fit()]] to avoid dealing with schema validation
* Developers can implement this instead of `fit()` to avoid dealing with schema validation
* and copying parameters into the model.
*
* @param dataset Training dataset
Expand Down Expand Up @@ -321,7 +321,7 @@ class MultilayerPerceptronClassificationModel private[ml] (

/**
* Predict label for the given features.
* This internal method is used to implement [[transform()]] and output [[predictionCol]].
* This internal method is used to implement `transform()` and output [[predictionCol]].
*/
override protected def predict(features: Vector): Double = {
LabelConverter.decodeLabel(mlpModel.predict(features))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ abstract class ProbabilisticClassificationModel[
/**
* Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
* parameters:
* - predicted labels as [[predictionCol]] of type [[Double]]
* - predicted labels as [[predictionCol]] of type `Double`
* - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector`
* - probability of each class as [[probabilityCol]] of type `Vector`.
*
Expand Down Expand Up @@ -158,13 +158,15 @@ abstract class ProbabilisticClassificationModel[
* doing the computation in-place.
* These predictions are also called class conditional probabilities.
*
* This internal method is used to implement [[transform()]] and output [[probabilityCol]].
* This internal method is used to implement `transform()` and output [[probabilityCol]].
*
* @return Estimated class conditional probabilities (modified input vector)
*/
protected def raw2probabilityInPlace(rawPrediction: Vector): Vector

/** Non-in-place version of [[raw2probabilityInPlace()]] */
/**
* Non-in-place version of `raw2probabilityInPlace()`
*/
protected def raw2probability(rawPrediction: Vector): Vector = {
val probs = rawPrediction.copy
raw2probabilityInPlace(probs)
Expand All @@ -182,7 +184,7 @@ abstract class ProbabilisticClassificationModel[
* Predict the probability of each class given the features.
* These predictions are also called class conditional probabilities.
*
* This internal method is used to implement [[transform()]] and output [[probabilityCol]].
* This internal method is used to implement `transform()` and output [[probabilityCol]].
*
* @return Estimated class conditional probabilities
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
* $$
* </blockquote>
*
* For the case $E_{max} == E_{min}$, $Rescaled(e_i) = 0.5 * (max + min)$.
* For the case \(E_{max} == E_{min}\), \(Rescaled(e_i) = 0.5 * (max + min)\).
*
* @note Since zero values will probably be transformed to non-zero values, output of the
* transformer will be DenseVector even for sparse input.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ import org.apache.spark.sql.DataFrame
*
* The `ml.feature` package provides common feature transformers that help convert raw data or
* features into more suitable forms for model fitting.
* Most feature transformers are implemented as [[Transformer]]s, which transform one [[DataFrame]]
* Most feature transformers are implemented as [[Transformer]]s, which transform one `DataFrame`
* into another, e.g., [[HashingTF]].
* Some feature transformers are implemented as [[Estimator]]s, because the transformation requires
* some aggregated information of the dataset, e.g., document frequencies in [[IDF]].
* For those feature transformers, calling [[Estimator!.fit]] is required to obtain the model first,
* For those feature transformers, calling `Estimator.fit` is required to obtain the model first,
* e.g., [[IDFModel]], in order to apply transformation.
* The transformation is usually done by appending new columns to the input [[DataFrame]], so all
* The transformation is usually done by appending new columns to the input `DataFrame`, so all
* input columns are carried over.
*
* We try to make each transformer minimal, so it becomes flexible to assemble feature
Expand Down
Loading

0 comments on commit 0880b9b

Please sign in to comment.