Renamed kappa, tau0 to learningDecay, learningOffset

apache · Nov 10, 2015 · 8eaa596 · 8eaa596
1 parent a55de6d
commit 8eaa596
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 22 deletions.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -134,6 +134,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
   }
 
   /** Supported values for Param [[optimizer]]. */
+  @Since("1.6.0")
   final val supportedOptimizers: Array[String] = Array("online", "em")
 
   /**
@@ -186,32 +187,34 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
   /**
    * A (positive) learning parameter that downweights early iterations. Larger values make early
    * iterations count less.
-   * Default: 1024, following the Online LDA paper (Hoffman et al., 2010).
+   * This is called "tau0" in the Online LDA paper (Hoffman et al., 2010)
+   * Default: 1024, following Hoffman et al.
    * @group expertParam
    */
   @Since("1.6.0")
-  final val tau0 = new DoubleParam(this, "tau0", "A (positive) learning parameter that" +
-    " downweights early iterations. Larger values make early iterations count less.",
+  final val learningOffset = new DoubleParam(this, "learningOffset", "A (positive) learning" +
+    " parameter that downweights early iterations. Larger values make early iterations count less.",
     ParamValidators.gt(0))
 
   /** @group expertGetParam */
   @Since("1.6.0")
-  def getTau0: Double = $(tau0)
+  def getLearningOffset: Double = $(learningOffset)
 
   /**
    * Learning rate, set as an exponential decay rate.
    * This should be between (0.5, 1.0] to guarantee asymptotic convergence.
-   * Default: 0.51, based on the Online LDA paper (Hoffman et al., 2010).
+   * This is called "kappa" in the Online LDA paper (Hoffman et al., 2010).
+   * Default: 0.51, based on Hoffman et al.
    * @group expertParam
    */
   @Since("1.6.0")
-  final val kappa = new DoubleParam(this, "kappa", "Learning rate, set as an exponential decay" +
-    " rate. This should be between (0.5, 1.0] to guarantee asymptotic convergence.",
-    ParamValidators.gt(0))
+  final val learningDecay = new DoubleParam(this, "learningDecay", "Learning rate, set as an" +
+    " exponential decay rate. This should be between (0.5, 1.0] to guarantee asymptotic" +
+    " convergence.", ParamValidators.gt(0))
 
   /** @group expertGetParam */
   @Since("1.6.0")
-  def getKappa: Double = $(kappa)
+  def getLearningDecay: Double = $(learningDecay)
 
   /**
    * Fraction of the corpus to be sampled and used in each iteration of mini-batch gradient descent,
@@ -262,6 +265,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
     SchemaUtils.appendColumn(schema, $(topicDistributionCol), new VectorUDT)
   }
 
+  @Since("1.6.0")
   override def validateParams(): Unit = {
     if (isSet(docConcentration)) {
       if (getDocConcentration.length != 1) {
@@ -295,8 +299,8 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
   private[clustering] def getOldOptimizer: OldLDAOptimizer = getOptimizer match {
     case "online" =>
       new OldOnlineLDAOptimizer()
-        .setTau0($(tau0))
-        .setKappa($(kappa))
+        .setTau0($(learningOffset))
+        .setKappa($(learningDecay))
         .setMiniBatchFraction($(subsamplingRate))
         .setOptimizeDocConcentration($(optimizeDocConcentration))
     case "em" =>
@@ -587,7 +591,8 @@ class LDA @Since("1.6.0") (
   def this() = this(Identifiable.randomUID("lda"))
 
   setDefault(maxIter -> 20, k -> 10, optimizer -> "online", checkpointInterval -> 10,
-    tau0 -> 1024, kappa -> 0.51, subsamplingRate -> 0.05, optimizeDocConcentration -> true)
+    learningOffset -> 1024, learningDecay -> 0.51, subsamplingRate -> 0.05,
+    optimizeDocConcentration -> true)
 
   /**
    * The features for LDA should be a [[Vector]] representing the word counts in a document.
@@ -635,11 +640,11 @@ class LDA @Since("1.6.0") (
 
   /** @group expertSetParam */
   @Since("1.6.0")
-  def setTau0(value: Double): this.type = set(tau0, value)
+  def setLearningOffset(value: Double): this.type = set(learningOffset, value)
 
   /** @group expertSetParam */
   @Since("1.6.0")
-  def setKappa(value: Double): this.type = set(kappa, value)
+  def setLearningDecay(value: Double): this.type = set(learningDecay, value)
 
   /** @group setParam */
   @Since("1.6.0")

diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
@@ -64,8 +64,8 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(!lda.isSet(lda.docConcentration))
     assert(!lda.isSet(lda.topicConcentration))
     assert(lda.getOptimizer === "online")
-    assert(lda.getKappa === 0.51)
-    assert(lda.getTau0 === 1024)
+    assert(lda.getLearningDecay === 0.51)
+    assert(lda.getLearningOffset === 1024)
     assert(lda.getSubsamplingRate === 0.05)
     assert(lda.getOptimizeDocConcentration)
     assert(lda.getTopicDistributionCol === "topicDistribution")
@@ -95,10 +95,10 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(lda.getOptimizer === "em")
     lda.setOptimizer("online")
     assert(lda.getOptimizer === "online")
-    lda.setKappa(0.53)
-    assert(lda.getKappa === 0.53)
-    lda.setTau0(1027)
-    assert(lda.getTau0 === 1027)
+    lda.setLearningDecay(0.53)
+    assert(lda.getLearningDecay === 0.53)
+    lda.setLearningOffset(1027)
+    assert(lda.getLearningOffset === 1027)
     lda.setSubsamplingRate(0.06)
     assert(lda.getSubsamplingRate === 0.06)
     lda.setOptimizeDocConcentration(false)
@@ -137,10 +137,10 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
 
     // Online LDA
     intercept[IllegalArgumentException] {
-      new LDA().setTau0(0)
+      new LDA().setLearningOffset(0)
     }
     intercept[IllegalArgumentException] {
-      new LDA().setKappa(0)
+      new LDA().setLearningDecay(0)
     }
     intercept[IllegalArgumentException] {
       new LDA().setSubsamplingRate(0)