Skip to content

Commit

Permalink
Addressing reviewers comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
avulanov committed Jul 31, 2015
1 parent a7e7951 commit 4806b6f
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 23 deletions.
2 changes: 1 addition & 1 deletion mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ private[ml] class FeedForwardTrainer(

// TODO: what if we need to pass random seed?
private var _weights = topology.getInstance(11L).weights()
private var _stackSize = 100
private var _stackSize = 128
private var dataStacker = new DataStacker(_stackSize, inputSize, outputSize)
private var _gradient: Gradient = new ANNGradient(topology, dataStacker)
private var _updater: Updater = new ANNUpdater()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams
*/
def setSeed(value: Long): this.type = set(seed, value)

setDefault(maxIter -> 100, tol -> 1e-4, layers -> Array(1, 1), blockSize -> 100)
setDefault(maxIter -> 100, tol -> 1e-4, layers -> Array(1, 1), blockSize -> 128)
}

/** Label to vector converter. */
Expand Down Expand Up @@ -153,8 +153,8 @@ class MultilayerPerceptronClassifier(override val uid: String)
val data = lpData.map(lp => LabelConverter.encodeLabeledPoint(lp, labels))
val topology = FeedForwardTopology.multiLayerPerceptron(myLayers, true)
val FeedForwardTrainer = new FeedForwardTrainer(topology, myLayers(0), myLayers.last)
FeedForwardTrainer.LBFGSOptimizer.setConvergenceTol(getTol).setNumIterations(getMaxIter)
FeedForwardTrainer.setStackSize(getBlockSize)
FeedForwardTrainer.LBFGSOptimizer.setConvergenceTol($(tol)).setNumIterations($(maxIter))
FeedForwardTrainer.setStackSize($(blockSize))
val mlpModel = FeedForwardTrainer.train(data)
new MultilayerPerceptronClassifierModel(uid, myLayers, mlpModel.weights())
}
Expand All @@ -166,11 +166,11 @@ class MultilayerPerceptronClassifier(override val uid: String)
* Each layer has sigmoid activation function, output layer has softmax.
* @param uid uid
* @param layers array of layer sizes including input and output layers
* @param weights vector of initial weights for the model
* @param weights vector of initial weights for the model that consists of the weights of layers
* @return prediction model
*/
@Experimental
class MultilayerPerceptronClassifierModel private[ml](
class MultilayerPerceptronClassifierModel private[ml] (
override val uid: String,
layers: Array[Int],
weights: Vector)
Expand Down
11 changes: 2 additions & 9 deletions mllib/src/main/scala/org/apache/spark/ml/param/params.scala
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,8 @@ object ParamValidators {
}

/** Check that the array length is greater than lowerBound. */
def arrayLengthGt[T](lowerBound: Double): T => Boolean = { (value: T) =>
val array: Array[_] = value match {
case x: Array[_] => x
case _ =>
// The type should be checked before this is ever called.
throw new IllegalArgumentException("Array Param validation failed because" +
s" of unexpected input type: ${value.getClass}")
}
array.length > lowerBound
def arrayLengthGt[T](lowerBound: Double): Array[T] => Boolean = { (value: Array[T]) =>
value.length > lowerBound
}
}

Expand Down
20 changes: 13 additions & 7 deletions mllib/src/test/scala/org/apache/spark/ml/ann/ANNSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,22 @@ package org.apache.spark.ml.ann
import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._


class ANNSuite extends SparkFunSuite with MLlibTestSparkContext {

// TODO: test for weights comparison with Weka MLP
test("ANN with Sigmoid learns XOR function with LBFGS optimizer") {
val inputs = Array[Array[Double]](
val inputs = Array(
Array(0.0, 0.0),
Array(0.0, 1.0),
Array(1.0, 0.0),
Array(1.0, 1.0)
)
val outputs = Array(0.0, 1.0, 1.0, 0.0)
val data = inputs.zip(outputs).map { case (features, label) =>
(Vectors.dense(features), Vectors.dense(Array(label)))
(Vectors.dense(features), Vectors.dense(label))
}
val rddData = sc.parallelize(data, 1)
val hiddenLayersTopology = Array(5)
Expand All @@ -48,17 +50,19 @@ class ANNSuite extends SparkFunSuite with MLlibTestSparkContext {
val predictionAndLabels = rddData.map { case (input, label) =>
(model.predict(input)(0), label(0))
}.collect()
assert(predictionAndLabels.forall { case (p, l) => (math.round(p) - l) == 0})
predictionAndLabels.foreach { case (p, l) =>
assert(math.round(p) === l)
}
}

test("ANN with SoftMax learns XOR function with 2-bit output and batch GD optimizer") {
val inputs = Array[Array[Double]](
val inputs = Array(
Array(0.0, 0.0),
Array(0.0, 1.0),
Array(1.0, 0.0),
Array(1.0, 1.0)
)
val outputs = Array[Array[Double]](
val outputs = Array(
Array(1.0, 0.0),
Array(0.0, 1.0),
Array(0.0, 1.0),
Expand All @@ -78,8 +82,10 @@ class ANNSuite extends SparkFunSuite with MLlibTestSparkContext {
trainer.setWeights(initialWeights)
val model = trainer.train(rddData)
val predictionAndLabels = rddData.map { case (input, label) =>
(model.predict(input).toArray.map(math.round(_)), label.toArray)
(model.predict(input), label)
}.collect()
assert(predictionAndLabels.forall { case (p, l) => p.deep == l.deep})
predictionAndLabels.foreach { case (p, l) =>
assert(p ~== l absTol 0.5)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class MultilayerPerceptronClassifierSuite extends SparkFunSuite with MLlibTestSp
val result = model.transform(dataFrame)
val predictionAndLabels = result.select("prediction", "label").collect()
predictionAndLabels.foreach { case Row(p: Double, l: Double) =>
assert(p == l) }
assert(p == l)
}
}

// TODO: implement a more rigorous test
Expand Down

0 comments on commit 4806b6f

Please sign in to comment.