From 27b07ef168fcaeb26b45968d4a82f7df349a2f65 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Tue, 29 Nov 2016 21:05:20 -0800 Subject: [PATCH] Revert back since tag for traits and fix docs. --- docs/ml-features.md | 4 ++-- .../scala/org/apache/spark/ml/feature/ChiSqSelector.scala | 8 ++++++++ .../org/apache/spark/ml/feature/QuantileDiscretizer.scala | 6 ++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/ml-features.md b/docs/ml-features.md index ba318fdf633a9..53c822c335f51 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -1189,8 +1189,8 @@ that the number of buckets used will be smaller than this value, for example, if distinct values of the input to create enough distinct quantiles. NaN values: -NaN values will be removed from the column when `QuantileDiscretizer` fitting. This will produce -a `Bucketizer` model for making prediction and transformation. During the transformation, `Bucketizer` +NaN values will be removed from the column during `QuantileDiscretizer` fitting. This will produce +a `Bucketizer` model for making predictions. During the transformation, `Bucketizer` will raise an error when it finds NaN values in the dataset, but the user can also choose to either keep or remove NaN values within the dataset by setting `handleInvalid`. If the user chooses to keep NaN values, they will be handled specially and placed into their own bucket, for example, if 4 buckets diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala index e978fb8c151c6..9725125b6cb06 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala @@ -49,6 +49,7 @@ private[feature] trait ChiSqSelectorParams extends Params * * @group param */ + @Since("1.6.0") final val numTopFeatures = new IntParam(this, "numTopFeatures", "Number of features that selector will select, ordered by ascending p-value. If the" + " number of features is < numTopFeatures, then this will select all features.", @@ -56,6 +57,7 @@ private[feature] trait ChiSqSelectorParams extends Params setDefault(numTopFeatures -> 50) /** @group getParam */ + @Since("1.6.0") def getNumTopFeatures: Int = $(numTopFeatures) /** @@ -64,12 +66,14 @@ private[feature] trait ChiSqSelectorParams extends Params * Default value is 0.1. * @group param */ + @Since("2.1.0") final val percentile = new DoubleParam(this, "percentile", "Percentile of features that selector will select, ordered by ascending p-value.", ParamValidators.inRange(0, 1)) setDefault(percentile -> 0.1) /** @group getParam */ + @Since("2.1.0") def getPercentile: Double = $(percentile) /** @@ -78,11 +82,13 @@ private[feature] trait ChiSqSelectorParams extends Params * Default value is 0.05. * @group param */ + @Since("2.1.0") final val fpr = new DoubleParam(this, "fpr", "The highest p-value for features to be kept.", ParamValidators.inRange(0, 1)) setDefault(fpr -> 0.05) /** @group getParam */ + @Since("2.1.0") def getFpr: Double = $(fpr) /** @@ -90,6 +96,7 @@ private[feature] trait ChiSqSelectorParams extends Params * Supported options: "numTopFeatures" (default), "percentile", "fpr". * @group param */ + @Since("2.1.0") final val selectorType = new Param[String](this, "selectorType", "The selector type of the ChisqSelector. " + "Supported options: " + OldChiSqSelector.supportedSelectorTypes.mkString(", "), @@ -97,6 +104,7 @@ private[feature] trait ChiSqSelectorParams extends Params setDefault(selectorType -> OldChiSqSelector.NumTopFeatures) /** @group getParam */ + @Since("2.1.0") def getSelectorType: String = $(selectorType) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala index c195b514f4529..b2ec37bf935fe 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala @@ -73,6 +73,7 @@ private[feature] trait QuantileDiscretizerBase extends Params * @group param */ // TODO: SPARK-18619 Make QuantileDiscretizer inherit from HasHandleInvalid. + @Since("2.1.0") val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle " + "invalid entries. Options are skip (filter out rows with invalid values), " + "error (throw an error), or keep (keep invalid values in a special additional bucket).", @@ -80,6 +81,7 @@ private[feature] trait QuantileDiscretizerBase extends Params setDefault(handleInvalid, Bucketizer.ERROR_INVALID) /** @group getParam */ + @Since("2.1.0") def getHandleInvalid: String = $(handleInvalid) } @@ -91,8 +93,8 @@ private[feature] trait QuantileDiscretizerBase extends Params * are too few distinct values of the input to create enough distinct quantiles. * * NaN handling: - * NaN values will be removed from the column when `QuantileDiscretizer` fitting. This will produce - * a `Bucketizer` model for making prediction and transformation. During the transformation, + * NaN values will be removed from the column during `QuantileDiscretizer` fitting. This will + * produce a `Bucketizer` model for making predictions. During the transformation, * `Bucketizer` will raise an error when it finds NaN values in the dataset, but the user can * also choose to either keep or remove NaN values within the dataset by setting `handleInvalid`. * If the user chooses to keep NaN values, they will be handled specially and placed into their own