Skip to content

Commit

Permalink
Renamed kappa, tau0 to learningDecay, learningOffset
Browse files Browse the repository at this point in the history
  • Loading branch information
jkbradley committed Nov 10, 2015
1 parent a55de6d commit 8eaa596
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 22 deletions.
33 changes: 19 additions & 14 deletions mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
}

/** Supported values for Param [[optimizer]]. */
@Since("1.6.0")
final val supportedOptimizers: Array[String] = Array("online", "em")

/**
Expand Down Expand Up @@ -186,32 +187,34 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
/**
* A (positive) learning parameter that downweights early iterations. Larger values make early
* iterations count less.
* Default: 1024, following the Online LDA paper (Hoffman et al., 2010).
* This is called "tau0" in the Online LDA paper (Hoffman et al., 2010)
* Default: 1024, following Hoffman et al.
* @group expertParam
*/
@Since("1.6.0")
final val tau0 = new DoubleParam(this, "tau0", "A (positive) learning parameter that" +
" downweights early iterations. Larger values make early iterations count less.",
final val learningOffset = new DoubleParam(this, "learningOffset", "A (positive) learning" +
" parameter that downweights early iterations. Larger values make early iterations count less.",
ParamValidators.gt(0))

/** @group expertGetParam */
@Since("1.6.0")
def getTau0: Double = $(tau0)
def getLearningOffset: Double = $(learningOffset)

/**
* Learning rate, set as an exponential decay rate.
* This should be between (0.5, 1.0] to guarantee asymptotic convergence.
* Default: 0.51, based on the Online LDA paper (Hoffman et al., 2010).
* This is called "kappa" in the Online LDA paper (Hoffman et al., 2010).
* Default: 0.51, based on Hoffman et al.
* @group expertParam
*/
@Since("1.6.0")
final val kappa = new DoubleParam(this, "kappa", "Learning rate, set as an exponential decay" +
" rate. This should be between (0.5, 1.0] to guarantee asymptotic convergence.",
ParamValidators.gt(0))
final val learningDecay = new DoubleParam(this, "learningDecay", "Learning rate, set as an" +
" exponential decay rate. This should be between (0.5, 1.0] to guarantee asymptotic" +
" convergence.", ParamValidators.gt(0))

/** @group expertGetParam */
@Since("1.6.0")
def getKappa: Double = $(kappa)
def getLearningDecay: Double = $(learningDecay)

/**
* Fraction of the corpus to be sampled and used in each iteration of mini-batch gradient descent,
Expand Down Expand Up @@ -262,6 +265,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
SchemaUtils.appendColumn(schema, $(topicDistributionCol), new VectorUDT)
}

@Since("1.6.0")
override def validateParams(): Unit = {
if (isSet(docConcentration)) {
if (getDocConcentration.length != 1) {
Expand Down Expand Up @@ -295,8 +299,8 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
private[clustering] def getOldOptimizer: OldLDAOptimizer = getOptimizer match {
case "online" =>
new OldOnlineLDAOptimizer()
.setTau0($(tau0))
.setKappa($(kappa))
.setTau0($(learningOffset))
.setKappa($(learningDecay))
.setMiniBatchFraction($(subsamplingRate))
.setOptimizeDocConcentration($(optimizeDocConcentration))
case "em" =>
Expand Down Expand Up @@ -587,7 +591,8 @@ class LDA @Since("1.6.0") (
def this() = this(Identifiable.randomUID("lda"))

setDefault(maxIter -> 20, k -> 10, optimizer -> "online", checkpointInterval -> 10,
tau0 -> 1024, kappa -> 0.51, subsamplingRate -> 0.05, optimizeDocConcentration -> true)
learningOffset -> 1024, learningDecay -> 0.51, subsamplingRate -> 0.05,
optimizeDocConcentration -> true)

/**
* The features for LDA should be a [[Vector]] representing the word counts in a document.
Expand Down Expand Up @@ -635,11 +640,11 @@ class LDA @Since("1.6.0") (

/** @group expertSetParam */
@Since("1.6.0")
def setTau0(value: Double): this.type = set(tau0, value)
def setLearningOffset(value: Double): this.type = set(learningOffset, value)

/** @group expertSetParam */
@Since("1.6.0")
def setKappa(value: Double): this.type = set(kappa, value)
def setLearningDecay(value: Double): this.type = set(learningDecay, value)

/** @group setParam */
@Since("1.6.0")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
assert(!lda.isSet(lda.docConcentration))
assert(!lda.isSet(lda.topicConcentration))
assert(lda.getOptimizer === "online")
assert(lda.getKappa === 0.51)
assert(lda.getTau0 === 1024)
assert(lda.getLearningDecay === 0.51)
assert(lda.getLearningOffset === 1024)
assert(lda.getSubsamplingRate === 0.05)
assert(lda.getOptimizeDocConcentration)
assert(lda.getTopicDistributionCol === "topicDistribution")
Expand Down Expand Up @@ -95,10 +95,10 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
assert(lda.getOptimizer === "em")
lda.setOptimizer("online")
assert(lda.getOptimizer === "online")
lda.setKappa(0.53)
assert(lda.getKappa === 0.53)
lda.setTau0(1027)
assert(lda.getTau0 === 1027)
lda.setLearningDecay(0.53)
assert(lda.getLearningDecay === 0.53)
lda.setLearningOffset(1027)
assert(lda.getLearningOffset === 1027)
lda.setSubsamplingRate(0.06)
assert(lda.getSubsamplingRate === 0.06)
lda.setOptimizeDocConcentration(false)
Expand Down Expand Up @@ -137,10 +137,10 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {

// Online LDA
intercept[IllegalArgumentException] {
new LDA().setTau0(0)
new LDA().setLearningOffset(0)
}
intercept[IllegalArgumentException] {
new LDA().setKappa(0)
new LDA().setLearningDecay(0)
}
intercept[IllegalArgumentException] {
new LDA().setSubsamplingRate(0)
Expand Down

0 comments on commit 8eaa596

Please sign in to comment.