diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index 45e3da51a6c5d..b5258ff348477 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -776,7 +776,7 @@ private[ml] class FeedForwardTrainer( // TODO: what if we need to pass random seed? private var _weights = topology.getInstance(11L).weights() - private var _stackSize = 100 + private var _stackSize = 128 private var dataStacker = new DataStacker(_stackSize, inputSize, outputSize) private var _gradient: Gradient = new ANNGradient(topology, dataStacker) private var _updater: Updater = new ANNUpdater() diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala index f92baf41617f0..8cd2103d7d5e6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala @@ -88,7 +88,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams */ def setSeed(value: Long): this.type = set(seed, value) - setDefault(maxIter -> 100, tol -> 1e-4, layers -> Array(1, 1), blockSize -> 100) + setDefault(maxIter -> 100, tol -> 1e-4, layers -> Array(1, 1), blockSize -> 128) } /** Label to vector converter. */ @@ -153,8 +153,8 @@ class MultilayerPerceptronClassifier(override val uid: String) val data = lpData.map(lp => LabelConverter.encodeLabeledPoint(lp, labels)) val topology = FeedForwardTopology.multiLayerPerceptron(myLayers, true) val FeedForwardTrainer = new FeedForwardTrainer(topology, myLayers(0), myLayers.last) - FeedForwardTrainer.LBFGSOptimizer.setConvergenceTol(getTol).setNumIterations(getMaxIter) - FeedForwardTrainer.setStackSize(getBlockSize) + FeedForwardTrainer.LBFGSOptimizer.setConvergenceTol($(tol)).setNumIterations($(maxIter)) + FeedForwardTrainer.setStackSize($(blockSize)) val mlpModel = FeedForwardTrainer.train(data) new MultilayerPerceptronClassifierModel(uid, myLayers, mlpModel.weights()) } @@ -166,11 +166,11 @@ class MultilayerPerceptronClassifier(override val uid: String) * Each layer has sigmoid activation function, output layer has softmax. * @param uid uid * @param layers array of layer sizes including input and output layers - * @param weights vector of initial weights for the model + * @param weights vector of initial weights for the model that consists of the weights of layers * @return prediction model */ @Experimental -class MultilayerPerceptronClassifierModel private[ml]( +class MultilayerPerceptronClassifierModel private[ml] ( override val uid: String, layers: Array[Int], weights: Vector) diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index 5e1855d6a50b9..cbff804c806fc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -168,15 +168,8 @@ object ParamValidators { } /** Check that the array length is greater than lowerBound. */ - def arrayLengthGt[T](lowerBound: Double): T => Boolean = { (value: T) => - val array: Array[_] = value match { - case x: Array[_] => x - case _ => - // The type should be checked before this is ever called. - throw new IllegalArgumentException("Array Param validation failed because" + - s" of unexpected input type: ${value.getClass}") - } - array.length > lowerBound + def arrayLengthGt[T](lowerBound: Double): Array[T] => Boolean = { (value: Array[T]) => + value.length > lowerBound } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/ann/ANNSuite.scala index 449288a48da65..1292e57d7c01a 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/ann/ANNSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/ann/ANNSuite.scala @@ -20,12 +20,14 @@ package org.apache.spark.ml.ann import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.mllib.util.TestingUtils._ + class ANNSuite extends SparkFunSuite with MLlibTestSparkContext { // TODO: test for weights comparison with Weka MLP test("ANN with Sigmoid learns XOR function with LBFGS optimizer") { - val inputs = Array[Array[Double]]( + val inputs = Array( Array(0.0, 0.0), Array(0.0, 1.0), Array(1.0, 0.0), @@ -33,7 +35,7 @@ class ANNSuite extends SparkFunSuite with MLlibTestSparkContext { ) val outputs = Array(0.0, 1.0, 1.0, 0.0) val data = inputs.zip(outputs).map { case (features, label) => - (Vectors.dense(features), Vectors.dense(Array(label))) + (Vectors.dense(features), Vectors.dense(label)) } val rddData = sc.parallelize(data, 1) val hiddenLayersTopology = Array(5) @@ -48,17 +50,19 @@ class ANNSuite extends SparkFunSuite with MLlibTestSparkContext { val predictionAndLabels = rddData.map { case (input, label) => (model.predict(input)(0), label(0)) }.collect() - assert(predictionAndLabels.forall { case (p, l) => (math.round(p) - l) == 0}) + predictionAndLabels.foreach { case (p, l) => + assert(math.round(p) === l) + } } test("ANN with SoftMax learns XOR function with 2-bit output and batch GD optimizer") { - val inputs = Array[Array[Double]]( + val inputs = Array( Array(0.0, 0.0), Array(0.0, 1.0), Array(1.0, 0.0), Array(1.0, 1.0) ) - val outputs = Array[Array[Double]]( + val outputs = Array( Array(1.0, 0.0), Array(0.0, 1.0), Array(0.0, 1.0), @@ -78,8 +82,10 @@ class ANNSuite extends SparkFunSuite with MLlibTestSparkContext { trainer.setWeights(initialWeights) val model = trainer.train(rddData) val predictionAndLabels = rddData.map { case (input, label) => - (model.predict(input).toArray.map(math.round(_)), label.toArray) + (model.predict(input), label) }.collect() - assert(predictionAndLabels.forall { case (p, l) => p.deep == l.deep}) + predictionAndLabels.foreach { case (p, l) => + assert(p ~== l absTol 0.5) + } } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala index a42b0b362345f..ddc948f65df45 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala @@ -45,7 +45,8 @@ class MultilayerPerceptronClassifierSuite extends SparkFunSuite with MLlibTestSp val result = model.transform(dataFrame) val predictionAndLabels = result.select("prediction", "label").collect() predictionAndLabels.foreach { case Row(p: Double, l: Double) => - assert(p == l) } + assert(p == l) + } } // TODO: implement a more rigorous test