From d535d62e4518e5054a2784ee29ea478f471a67b9 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 19 Jun 2014 19:39:03 +0400 Subject: [PATCH 01/14] Multiclass evaluation --- .../evaluation/MulticlassEvaluator.scala | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassEvaluator.scala diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassEvaluator.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassEvaluator.scala new file mode 100644 index 0000000000000..4f52d5e7c301f --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassEvaluator.scala @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.evaluation + +import org.apache.spark.rdd.RDD +import org.apache.spark.Logging +import org.apache.spark.SparkContext._ + +/** + * Evaluator for multiclass classification. + * + * @param scoreAndLabels an RDD of (score, label) pairs. + */ +class MulticlassEvaluator(scoreAndLabels: RDD[(Double, Double)]) extends Logging { + + /* class = category; label = instance of class; prediction = instance of class */ + + private lazy val labelCountByClass = scoreAndLabels.values.countByValue() + private lazy val labelCount = labelCountByClass.foldLeft(0L){case(sum, (_, count)) => sum + count} + private lazy val tpByClass = scoreAndLabels.map{ case (prediction, label) => (label, if(label == prediction) 1 else 0) }.reduceByKey{_ + _}.collectAsMap + private lazy val fpByClass = scoreAndLabels.map{ case (prediction, label) => (prediction, if(prediction != label) 1 else 0) }.reduceByKey{_ + _}.collectAsMap + + /** + * Returns Precision for a given label (category) + * @param label the label. + * @return Precision. + */ + def precision(label: Double): Double = if(tpByClass(label) + fpByClass.getOrElse(label, 0) == 0) 0 + else tpByClass(label).toDouble / (tpByClass(label) + fpByClass.getOrElse(label, 0)).toDouble + + /** + * Returns Recall for a given label (category) + * @param label the label. + * @return Recall. + */ + def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label).toDouble + + /** + * Returns F1-measure for a given label (category) + * @param label the label. + * @return F1-measure.*/ + def f1Measure(label: Double): Double = 2 * precision(label) * recall(label) / (precision(label) + recall(label)) + + /** + * Returns micro-averaged Recall (equals to microPrecision and microF1measure for multiclass classifier) + * @return microRecall. + */ + def microRecall: Double = tpByClass.foldLeft(0L){case (sum,(_, tp)) => sum + tp}.toDouble / labelCount.toDouble + + /** + * Returns micro-averaged Precision (equals to microPrecision and microF1measure for multiclass classifier) + * @return microPrecision. + */ + def microPrecision: Double = microRecall + + /** + * Returns micro-averaged F1-measure (equals to microPrecision and microRecall for multiclass classifier) + * @return microF1measure. + */ + def microF1Measure: Double = microRecall + + /** + * Returns weighted averaged Recall + * @return weightedRecall. + */ + def weightedRecall: Double = labelCountByClass.foldLeft(0.0){case(wRecall, (category, count)) => wRecall + recall(category) * count.toDouble / labelCount.toDouble} + + /** + * Returns weighted averaged Precision + * @return weightedPrecision. + */ + def weightedPrecision: Double = labelCountByClass.foldLeft(0.0){case(wPrecision, (category, count)) => wPrecision + precision(category) * count.toDouble / labelCount.toDouble} + + /** + * Returns weighted averaged F1-measure + * @return weightedF1Measure. + */ + def weightedF1Measure: Double = 2 * weightedPrecision * weightedRecall / (weightedPrecision + weightedRecall) + + /** + * Returns map with Precisions for individual classes + * @return precisionPerClass. + */ + def precisionPerClass = labelCountByClass.map{case (category, _) => (category, precision(category))}.toMap + + /** + * Returns map with Recalls for individual classes + * @return recallPerClass. + */ + def recallPerClass = labelCountByClass.map{case (category, _) => (category, recall(category))}.toMap + + /** + * Returns map with F1-measures for individual classes + * @return f1MeasurePerClass. + */ + def f1MeasurePerClass = labelCountByClass.map{case (category, _) => (category, f1Measure(category))}.toMap +} \ No newline at end of file From fcee82d0b99efc5f3416ac43c98d7914a31f40e0 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 20 Jun 2014 17:42:28 +0400 Subject: [PATCH 02/14] Unit tests. Class rename --- .../evaluation/MulticlassEvaluator.scala | 112 ------------------ 1 file changed, 112 deletions(-) delete mode 100644 mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassEvaluator.scala diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassEvaluator.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassEvaluator.scala deleted file mode 100644 index 4f52d5e7c301f..0000000000000 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassEvaluator.scala +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.mllib.evaluation - -import org.apache.spark.rdd.RDD -import org.apache.spark.Logging -import org.apache.spark.SparkContext._ - -/** - * Evaluator for multiclass classification. - * - * @param scoreAndLabels an RDD of (score, label) pairs. - */ -class MulticlassEvaluator(scoreAndLabels: RDD[(Double, Double)]) extends Logging { - - /* class = category; label = instance of class; prediction = instance of class */ - - private lazy val labelCountByClass = scoreAndLabels.values.countByValue() - private lazy val labelCount = labelCountByClass.foldLeft(0L){case(sum, (_, count)) => sum + count} - private lazy val tpByClass = scoreAndLabels.map{ case (prediction, label) => (label, if(label == prediction) 1 else 0) }.reduceByKey{_ + _}.collectAsMap - private lazy val fpByClass = scoreAndLabels.map{ case (prediction, label) => (prediction, if(prediction != label) 1 else 0) }.reduceByKey{_ + _}.collectAsMap - - /** - * Returns Precision for a given label (category) - * @param label the label. - * @return Precision. - */ - def precision(label: Double): Double = if(tpByClass(label) + fpByClass.getOrElse(label, 0) == 0) 0 - else tpByClass(label).toDouble / (tpByClass(label) + fpByClass.getOrElse(label, 0)).toDouble - - /** - * Returns Recall for a given label (category) - * @param label the label. - * @return Recall. - */ - def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label).toDouble - - /** - * Returns F1-measure for a given label (category) - * @param label the label. - * @return F1-measure.*/ - def f1Measure(label: Double): Double = 2 * precision(label) * recall(label) / (precision(label) + recall(label)) - - /** - * Returns micro-averaged Recall (equals to microPrecision and microF1measure for multiclass classifier) - * @return microRecall. - */ - def microRecall: Double = tpByClass.foldLeft(0L){case (sum,(_, tp)) => sum + tp}.toDouble / labelCount.toDouble - - /** - * Returns micro-averaged Precision (equals to microPrecision and microF1measure for multiclass classifier) - * @return microPrecision. - */ - def microPrecision: Double = microRecall - - /** - * Returns micro-averaged F1-measure (equals to microPrecision and microRecall for multiclass classifier) - * @return microF1measure. - */ - def microF1Measure: Double = microRecall - - /** - * Returns weighted averaged Recall - * @return weightedRecall. - */ - def weightedRecall: Double = labelCountByClass.foldLeft(0.0){case(wRecall, (category, count)) => wRecall + recall(category) * count.toDouble / labelCount.toDouble} - - /** - * Returns weighted averaged Precision - * @return weightedPrecision. - */ - def weightedPrecision: Double = labelCountByClass.foldLeft(0.0){case(wPrecision, (category, count)) => wPrecision + precision(category) * count.toDouble / labelCount.toDouble} - - /** - * Returns weighted averaged F1-measure - * @return weightedF1Measure. - */ - def weightedF1Measure: Double = 2 * weightedPrecision * weightedRecall / (weightedPrecision + weightedRecall) - - /** - * Returns map with Precisions for individual classes - * @return precisionPerClass. - */ - def precisionPerClass = labelCountByClass.map{case (category, _) => (category, precision(category))}.toMap - - /** - * Returns map with Recalls for individual classes - * @return recallPerClass. - */ - def recallPerClass = labelCountByClass.map{case (category, _) => (category, recall(category))}.toMap - - /** - * Returns map with F1-measures for individual classes - * @return f1MeasurePerClass. - */ - def f1MeasurePerClass = labelCountByClass.map{case (category, _) => (category, f1Measure(category))}.toMap -} \ No newline at end of file From a5c8ba46689ed13080eb0681814a1d7c1a0cf497 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 20 Jun 2014 18:02:47 +0400 Subject: [PATCH 03/14] Unit tests. Class rename --- .../mllib/evaluation/MulticlassMetrics.scala | 128 ++++++++++++++++++ .../evaluation/MulticlassMetricsSuite.scala | 71 ++++++++++ 2 files changed, 199 insertions(+) create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala new file mode 100644 index 0000000000000..76fe96a5938c0 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.evaluation + +import org.apache.spark.rdd.RDD +import org.apache.spark.Logging +import org.apache.spark.SparkContext._ + +/** + * Evaluator for multiclass classification. + * + * @param scoreAndLabels an RDD of (score, label) pairs. + */ +class MulticlassMetrics(scoreAndLabels: RDD[(Double, Double)]) extends Logging { + + /* class = category; label = instance of class; prediction = instance of class */ + + private lazy val labelCountByClass = scoreAndLabels.values.countByValue() + private lazy val labelCount = labelCountByClass.foldLeft(0L){case(sum, (_, count)) => sum + count} + private lazy val tpByClass = scoreAndLabels.map{ case (prediction, label) => + (label, if(label == prediction) 1 else 0) }.reduceByKey{_ + _}.collectAsMap + private lazy val fpByClass = scoreAndLabels.map{ case (prediction, label) => + (prediction, if(prediction != label) 1 else 0) }.reduceByKey{_ + _}.collectAsMap + + /** + * Returns Precision for a given label (category) + * @param label the label. + * @return Precision. + */ + def precision(label: Double): Double = if(tpByClass(label) + fpByClass.getOrElse(label, 0) == 0) 0 + else tpByClass(label).toDouble / (tpByClass(label) + fpByClass.getOrElse(label, 0)).toDouble + + /** + * Returns Recall for a given label (category) + * @param label the label. + * @return Recall. + */ + def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label).toDouble + + /** + * Returns F1-measure for a given label (category) + * @param label the label. + * @return F1-measure. + */ + def f1Measure(label: Double): Double = + 2 * precision(label) * recall(label) / (precision(label) + recall(label)) + + /** + * Returns micro-averaged Recall + * (equals to microPrecision and microF1measure for multiclass classifier) + * @return microRecall. + */ + def microRecall: Double = + tpByClass.foldLeft(0L){case (sum,(_, tp)) => sum + tp}.toDouble / labelCount.toDouble + + /** + * Returns micro-averaged Precision + * (equals to microPrecision and microF1measure for multiclass classifier) + * @return microPrecision. + */ + def microPrecision: Double = microRecall + + /** + * Returns micro-averaged F1-measure + * (equals to microPrecision and microRecall for multiclass classifier) + * @return microF1measure. + */ + def microF1Measure: Double = microRecall + + /** + * Returns weighted averaged Recall + * @return weightedRecall. + */ + def weightedRecall: Double = labelCountByClass.foldLeft(0.0){case(wRecall, (category, count)) => + wRecall + recall(category) * count.toDouble / labelCount.toDouble} + + /** + * Returns weighted averaged Precision + * @return weightedPrecision. + */ + def weightedPrecision: Double = + labelCountByClass.foldLeft(0.0){case(wPrecision, (category, count)) => + wPrecision + precision(category) * count.toDouble / labelCount.toDouble} + + /** + * Returns weighted averaged F1-measure + * @return weightedF1Measure. + */ + def weightedF1Measure: Double = + labelCountByClass.foldLeft(0.0){case(wF1measure, (category, count)) => + wF1measure + f1Measure(category) * count.toDouble / labelCount.toDouble} + + /** + * Returns map with Precisions for individual classes + * @return precisionPerClass. + */ + def precisionPerClass = + labelCountByClass.map{case (category, _) => (category, precision(category))}.toMap + + /** + * Returns map with Recalls for individual classes + * @return recallPerClass. + */ + def recallPerClass = + labelCountByClass.map{case (category, _) => (category, recall(category))}.toMap + + /** + * Returns map with F1-measures for individual classes + * @return f1MeasurePerClass. + */ + def f1MeasurePerClass = + labelCountByClass.map{case (category, _) => (category, f1Measure(category))}.toMap +} diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala new file mode 100644 index 0000000000000..b4e3664ab7916 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.evaluation + +import org.apache.spark.mllib.util.LocalSparkContext +import org.scalatest.FunSuite + +class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { + test("Multiclass evaluation metrics") { + /* + * Confusion matrix for 3-class classification with total 9 instances: + * |2|1|1| true class0 (4 instances) + * |1|3|0| true class1 (4 instances) + * |0|0|1| true class2 (1 instance) + * + */ + val scoreAndLabels = sc.parallelize( + Seq((0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), + (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)), 2) + val metrics = new MulticlassMetrics(scoreAndLabels) + + val delta = 0.00001 + val precision0 = 2.0 / (2.0 + 1.0) + val precision1 = 3.0 / (3.0 + 1.0) + val precision2 = 1.0 / (1.0 + 1.0) + val recall0 = 2.0 / (2.0 + 2.0) + val recall1 = 3.0 / (3.0 + 1.0) + val recall2 = 1.0 / (1.0 + 0.0) + val f1measure0 = 2 * precision0 * recall0 / (precision0 + recall0) + val f1measure1 = 2 * precision1 * recall1 / (precision1 + recall1) + val f1measure2 = 2 * precision2 * recall2 / (precision2 + recall2) + + assert(math.abs(metrics.precision(0.0) - precision0) < delta) + assert(math.abs(metrics.precision(1.0) - precision1) < delta) + assert(math.abs(metrics.precision(2.0) - precision2) < delta) + assert(math.abs(metrics.recall(0.0) - recall0) < delta) + assert(math.abs(metrics.recall(1.0) - recall1) < delta) + assert(math.abs(metrics.recall(2.0) - recall2) < delta) + assert(math.abs(metrics.f1Measure(0.0) - f1measure0) < delta) + assert(math.abs(metrics.f1Measure(1.0) - f1measure1) < delta) + assert(math.abs(metrics.f1Measure(2.0) - f1measure2) < delta) + + assert(math.abs(metrics.microRecall - + (2.0 + 3.0 + 1.0) / ((2.0 + 3.0 + 1.0) + (1.0 + 1.0 + 1.0))) < delta) + assert(math.abs(metrics.microRecall - metrics.microPrecision) < delta) + assert(math.abs(metrics.microRecall - metrics.microF1Measure) < delta) + assert(math.abs(metrics.microRecall - metrics.weightedRecall) < delta) + assert(math.abs(metrics.weightedPrecision - + ((4.0 / 9.0) * precision0 + (4.0 / 9.0) * precision1 + (1.0 / 9.0) * precision2)) < delta) + assert(math.abs(metrics.weightedRecall - + ((4.0 / 9.0) * recall0 + (4.0 / 9.0) * recall1 + (1.0 / 9.0) * recall2)) < delta) + assert(math.abs(metrics.weightedF1Measure - + ((4.0 / 9.0) * f1measure0 + (4.0 / 9.0) * f1measure1 + (1.0 / 9.0) * f1measure2)) < delta) + + } +} From d5ce98103ddb97c8c83f07a2a71b9a22053c2cde Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 20 Jun 2014 18:18:51 +0400 Subject: [PATCH 04/14] Comments about Double --- .../spark/mllib/evaluation/MulticlassMetrics.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 76fe96a5938c0..dff84224a0e31 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -23,18 +23,21 @@ import org.apache.spark.SparkContext._ /** * Evaluator for multiclass classification. + * NB: type Double both for prediction and label is retained + * for compatibility with model.predict that returns Double + * and MLUtils.loadLibSVMFile that loads class labels as Double * - * @param scoreAndLabels an RDD of (score, label) pairs. + * @param predictionsAndLabels an RDD of (prediction, label) pairs. */ -class MulticlassMetrics(scoreAndLabels: RDD[(Double, Double)]) extends Logging { +class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Logging { /* class = category; label = instance of class; prediction = instance of class */ - private lazy val labelCountByClass = scoreAndLabels.values.countByValue() + private lazy val labelCountByClass = predictionsAndLabels.values.countByValue() private lazy val labelCount = labelCountByClass.foldLeft(0L){case(sum, (_, count)) => sum + count} - private lazy val tpByClass = scoreAndLabels.map{ case (prediction, label) => + private lazy val tpByClass = predictionsAndLabels.map{ case (prediction, label) => (label, if(label == prediction) 1 else 0) }.reduceByKey{_ + _}.collectAsMap - private lazy val fpByClass = scoreAndLabels.map{ case (prediction, label) => + private lazy val fpByClass = predictionsAndLabels.map{ case (prediction, label) => (prediction, if(prediction != label) 1 else 0) }.reduceByKey{_ + _}.collectAsMap /** From e2c91c37dff6b6d9ae002a9095ed969955d11cac Mon Sep 17 00:00:00 2001 From: Alexander Ulanov Date: Mon, 30 Jun 2014 13:51:04 +0400 Subject: [PATCH 05/14] Fixes to mutliclass metics --- .../mllib/evaluation/MulticlassMetrics.scala | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index dff84224a0e31..ebcee86acedfd 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -60,72 +60,75 @@ class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Log * @param label the label. * @return F1-measure. */ - def f1Measure(label: Double): Double = - 2 * precision(label) * recall(label) / (precision(label) + recall(label)) + def f1Measure(label: Double): Double ={ + val p = precision(label) + val r = recall(label) + if((p + r) == 0) 0 else 2 * p * r / (p + r) + } /** * Returns micro-averaged Recall * (equals to microPrecision and microF1measure for multiclass classifier) * @return microRecall. */ - def microRecall: Double = - tpByClass.foldLeft(0L){case (sum,(_, tp)) => sum + tp}.toDouble / labelCount.toDouble + lazy val microRecall: Double = + tpByClass.foldLeft(0L){case (sum,(_, tp)) => sum + tp}.toDouble / labelCount /** * Returns micro-averaged Precision * (equals to microPrecision and microF1measure for multiclass classifier) * @return microPrecision. */ - def microPrecision: Double = microRecall + lazy val microPrecision: Double = microRecall /** * Returns micro-averaged F1-measure * (equals to microPrecision and microRecall for multiclass classifier) * @return microF1measure. */ - def microF1Measure: Double = microRecall + lazy val microF1Measure: Double = microRecall /** * Returns weighted averaged Recall * @return weightedRecall. */ - def weightedRecall: Double = labelCountByClass.foldLeft(0.0){case(wRecall, (category, count)) => - wRecall + recall(category) * count.toDouble / labelCount.toDouble} + lazy val weightedRecall: Double = labelCountByClass.foldLeft(0.0){case(wRecall, (category, count)) => + wRecall + recall(category) * count.toDouble / labelCount} /** * Returns weighted averaged Precision * @return weightedPrecision. */ - def weightedPrecision: Double = + lazy val weightedPrecision: Double = labelCountByClass.foldLeft(0.0){case(wPrecision, (category, count)) => - wPrecision + precision(category) * count.toDouble / labelCount.toDouble} + wPrecision + precision(category) * count.toDouble / labelCount} /** * Returns weighted averaged F1-measure * @return weightedF1Measure. */ - def weightedF1Measure: Double = + lazy val weightedF1Measure: Double = labelCountByClass.foldLeft(0.0){case(wF1measure, (category, count)) => - wF1measure + f1Measure(category) * count.toDouble / labelCount.toDouble} + wF1measure + f1Measure(category) * count.toDouble / labelCount} /** * Returns map with Precisions for individual classes * @return precisionPerClass. */ - def precisionPerClass = + lazy val precisionPerClass = labelCountByClass.map{case (category, _) => (category, precision(category))}.toMap /** * Returns map with Recalls for individual classes * @return recallPerClass. */ - def recallPerClass = + lazy val recallPerClass = labelCountByClass.map{case (category, _) => (category, recall(category))}.toMap /** * Returns map with F1-measures for individual classes * @return f1MeasurePerClass. */ - def f1MeasurePerClass = + lazy val f1MeasurePerClass = labelCountByClass.map{case (category, _) => (category, f1Measure(category))}.toMap } From c3a77ad78247491433009afd9d0f81161fee9592 Mon Sep 17 00:00:00 2001 From: Alexander Ulanov Date: Fri, 4 Jul 2014 15:38:02 +0400 Subject: [PATCH 06/14] Addressing reviewers comments mengxr --- .../mllib/evaluation/MulticlassMetrics.scala | 109 ++++++++---------- .../evaluation/MulticlassMetricsSuite.scala | 25 ++-- 2 files changed, 59 insertions(+), 75 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index ebcee86acedfd..7a70117f36d4c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -17,118 +17,103 @@ package org.apache.spark.mllib.evaluation +import org.apache.spark.annotation.Experimental import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.SparkContext._ /** * Evaluator for multiclass classification. - * NB: type Double both for prediction and label is retained - * for compatibility with model.predict that returns Double - * and MLUtils.loadLibSVMFile that loads class labels as Double * * @param predictionsAndLabels an RDD of (prediction, label) pairs. */ +@Experimental class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Logging { - /* class = category; label = instance of class; prediction = instance of class */ - private lazy val labelCountByClass = predictionsAndLabels.values.countByValue() - private lazy val labelCount = labelCountByClass.foldLeft(0L){case(sum, (_, count)) => sum + count} - private lazy val tpByClass = predictionsAndLabels.map{ case (prediction, label) => - (label, if(label == prediction) 1 else 0) }.reduceByKey{_ + _}.collectAsMap - private lazy val fpByClass = predictionsAndLabels.map{ case (prediction, label) => - (prediction, if(prediction != label) 1 else 0) }.reduceByKey{_ + _}.collectAsMap + private lazy val labelCount = labelCountByClass.values.sum + private lazy val tpByClass = predictionsAndLabels + .map{ case (prediction, label) => + (label, if (label == prediction) 1 else 0) + }.reduceByKey(_ + _) + .collectAsMap() + private lazy val fpByClass = predictionsAndLabels + .map{ case (prediction, label) => + (prediction, if (prediction != label) 1 else 0) + }.reduceByKey(_ + _) + .collectAsMap() /** - * Returns Precision for a given label (category) + * Returns precision for a given label (category) * @param label the label. - * @return Precision. */ - def precision(label: Double): Double = if(tpByClass(label) + fpByClass.getOrElse(label, 0) == 0) 0 - else tpByClass(label).toDouble / (tpByClass(label) + fpByClass.getOrElse(label, 0)).toDouble + def precision(label: Double): Double = { + val tp = tpByClass(label) + val fp = fpByClass.getOrElse(label, 0) + if (tp + fp == 0) 0 else tp.toDouble / (tp + fp) + } /** - * Returns Recall for a given label (category) + * Returns recall for a given label (category) * @param label the label. - * @return Recall. */ - def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label).toDouble + def recall(label: Double): Double = tpByClass(label).toDouble / labelCountByClass(label) /** - * Returns F1-measure for a given label (category) + * Returns f-measure for a given label (category) * @param label the label. - * @return F1-measure. */ - def f1Measure(label: Double): Double ={ + def fMeasure(label: Double, beta:Double = 1.0): Double = { val p = precision(label) val r = recall(label) - if((p + r) == 0) 0 else 2 * p * r / (p + r) + val betaSqrd = beta * beta + if (p + r == 0) 0 else (1 + betaSqrd) * p * r / (betaSqrd * p + r) } /** - * Returns micro-averaged Recall + * Returns micro-averaged recall * (equals to microPrecision and microF1measure for multiclass classifier) - * @return microRecall. */ - lazy val microRecall: Double = - tpByClass.foldLeft(0L){case (sum,(_, tp)) => sum + tp}.toDouble / labelCount + lazy val recall: Double = + tpByClass.values.sum.toDouble / labelCount /** - * Returns micro-averaged Precision + * Returns micro-averaged precision * (equals to microPrecision and microF1measure for multiclass classifier) - * @return microPrecision. */ - lazy val microPrecision: Double = microRecall + lazy val precision: Double = recall /** - * Returns micro-averaged F1-measure + * Returns micro-averaged f-measure * (equals to microPrecision and microRecall for multiclass classifier) - * @return microF1measure. */ - lazy val microF1Measure: Double = microRecall + lazy val fMeasure: Double = recall /** - * Returns weighted averaged Recall - * @return weightedRecall. + * Returns weighted averaged recall + * (equals to micro-averaged precision, recall and f-measure) */ - lazy val weightedRecall: Double = labelCountByClass.foldLeft(0.0){case(wRecall, (category, count)) => - wRecall + recall(category) * count.toDouble / labelCount} + lazy val weightedRecall: Double = labelCountByClass.map { case (category, count) => + recall(category) * count.toDouble / labelCount + }.sum /** - * Returns weighted averaged Precision - * @return weightedPrecision. + * Returns weighted averaged precision */ - lazy val weightedPrecision: Double = - labelCountByClass.foldLeft(0.0){case(wPrecision, (category, count)) => - wPrecision + precision(category) * count.toDouble / labelCount} + lazy val weightedPrecision: Double = labelCountByClass.map { case (category, count) => + precision(category) * count.toDouble / labelCount + }.sum /** - * Returns weighted averaged F1-measure - * @return weightedF1Measure. + * Returns weighted averaged f1-measure */ - lazy val weightedF1Measure: Double = - labelCountByClass.foldLeft(0.0){case(wF1measure, (category, count)) => - wF1measure + f1Measure(category) * count.toDouble / labelCount} + lazy val weightedF1Measure: Double = labelCountByClass.map { case (category, count) => + fMeasure(category) * count.toDouble / labelCount + }.sum /** - * Returns map with Precisions for individual classes - * @return precisionPerClass. + * Returns the sequence of labels in ascending order */ - lazy val precisionPerClass = - labelCountByClass.map{case (category, _) => (category, precision(category))}.toMap + lazy val labels = tpByClass.unzip._1.toSeq.sorted - /** - * Returns map with Recalls for individual classes - * @return recallPerClass. - */ - lazy val recallPerClass = - labelCountByClass.map{case (category, _) => (category, recall(category))}.toMap - - /** - * Returns map with F1-measures for individual classes - * @return f1MeasurePerClass. - */ - lazy val f1MeasurePerClass = - labelCountByClass.map{case (category, _) => (category, f1Measure(category))}.toMap } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala index b4e3664ab7916..4b959b2d542ac 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala @@ -17,9 +17,10 @@ package org.apache.spark.mllib.evaluation -import org.apache.spark.mllib.util.LocalSparkContext import org.scalatest.FunSuite +import org.apache.spark.mllib.util.LocalSparkContext + class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { test("Multiclass evaluation metrics") { /* @@ -29,12 +30,12 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { * |0|0|1| true class2 (1 instance) * */ + val labels = Seq(0.0, 1.0, 2.0) val scoreAndLabels = sc.parallelize( Seq((0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)), 2) val metrics = new MulticlassMetrics(scoreAndLabels) - - val delta = 0.00001 + val delta = 0.0000001 val precision0 = 2.0 / (2.0 + 1.0) val precision1 = 3.0 / (3.0 + 1.0) val precision2 = 1.0 / (1.0 + 1.0) @@ -44,28 +45,26 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { val f1measure0 = 2 * precision0 * recall0 / (precision0 + recall0) val f1measure1 = 2 * precision1 * recall1 / (precision1 + recall1) val f1measure2 = 2 * precision2 * recall2 / (precision2 + recall2) - assert(math.abs(metrics.precision(0.0) - precision0) < delta) assert(math.abs(metrics.precision(1.0) - precision1) < delta) assert(math.abs(metrics.precision(2.0) - precision2) < delta) assert(math.abs(metrics.recall(0.0) - recall0) < delta) assert(math.abs(metrics.recall(1.0) - recall1) < delta) assert(math.abs(metrics.recall(2.0) - recall2) < delta) - assert(math.abs(metrics.f1Measure(0.0) - f1measure0) < delta) - assert(math.abs(metrics.f1Measure(1.0) - f1measure1) < delta) - assert(math.abs(metrics.f1Measure(2.0) - f1measure2) < delta) - - assert(math.abs(metrics.microRecall - + assert(math.abs(metrics.fMeasure(0.0) - f1measure0) < delta) + assert(math.abs(metrics.fMeasure(1.0) - f1measure1) < delta) + assert(math.abs(metrics.fMeasure(2.0) - f1measure2) < delta) + assert(math.abs(metrics.recall - (2.0 + 3.0 + 1.0) / ((2.0 + 3.0 + 1.0) + (1.0 + 1.0 + 1.0))) < delta) - assert(math.abs(metrics.microRecall - metrics.microPrecision) < delta) - assert(math.abs(metrics.microRecall - metrics.microF1Measure) < delta) - assert(math.abs(metrics.microRecall - metrics.weightedRecall) < delta) + assert(math.abs(metrics.recall - metrics.precision) < delta) + assert(math.abs(metrics.recall - metrics.fMeasure) < delta) + assert(math.abs(metrics.recall - metrics.weightedRecall) < delta) assert(math.abs(metrics.weightedPrecision - ((4.0 / 9.0) * precision0 + (4.0 / 9.0) * precision1 + (1.0 / 9.0) * precision2)) < delta) assert(math.abs(metrics.weightedRecall - ((4.0 / 9.0) * recall0 + (4.0 / 9.0) * recall1 + (1.0 / 9.0) * recall2)) < delta) assert(math.abs(metrics.weightedF1Measure - ((4.0 / 9.0) * f1measure0 + (4.0 / 9.0) * f1measure1 + (1.0 / 9.0) * f1measure2)) < delta) - + assert(metrics.labels == labels) } } From a7e8bf0cde8d4a92a42c6d6fdf811713768dffac Mon Sep 17 00:00:00 2001 From: Alexander Ulanov Date: Tue, 8 Jul 2014 13:22:04 +0400 Subject: [PATCH 07/14] Addressing reviewers comments mengxr --- .../mllib/evaluation/MulticlassMetrics.scala | 52 +++++++++++-------- .../evaluation/MulticlassMetricsSuite.scala | 4 +- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 7a70117f36d4c..8f25a3d0020d0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -22,7 +22,10 @@ import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.SparkContext._ +import scala.collection.Map + /** + * ::Experimental:: * Evaluator for multiclass classification. * * @param predictionsAndLabels an RDD of (prediction, label) pairs. @@ -30,16 +33,16 @@ import org.apache.spark.SparkContext._ @Experimental class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Logging { - private lazy val labelCountByClass = predictionsAndLabels.values.countByValue() - private lazy val labelCount = labelCountByClass.values.sum - private lazy val tpByClass = predictionsAndLabels - .map{ case (prediction, label) => - (label, if (label == prediction) 1 else 0) + private lazy val labelCountByClass: Map[Double, Long] = predictionsAndLabels.values.countByValue() + private lazy val labelCount: Long = labelCountByClass.values.sum + private lazy val tpByClass: Map[Double, Int] = predictionsAndLabels + .map { case (prediction, label) => + (label, if (label == prediction) 1 else 0) }.reduceByKey(_ + _) .collectAsMap() - private lazy val fpByClass = predictionsAndLabels - .map{ case (prediction, label) => - (prediction, if (prediction != label) 1 else 0) + private lazy val fpByClass: Map[Double, Int] = predictionsAndLabels + .map { case (prediction, label) => + (prediction, if (prediction != label) 1 else 0) }.reduceByKey(_ + _) .collectAsMap() @@ -63,7 +66,7 @@ class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Log * Returns f-measure for a given label (category) * @param label the label. */ - def fMeasure(label: Double, beta:Double = 1.0): Double = { + def fMeasure(label: Double, beta: Double): Double = { val p = precision(label) val r = recall(label) val betaSqrd = beta * beta @@ -71,27 +74,33 @@ class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Log } /** - * Returns micro-averaged recall - * (equals to microPrecision and microF1measure for multiclass classifier) + * Returns f1-measure for a given label (category) + * @param label the label. + */ + def fMeasure(label: Double): Double = fMeasure(label, 1.0) + + /** + * Returns precision */ - lazy val recall: Double = - tpByClass.values.sum.toDouble / labelCount + lazy val precision: Double = tpByClass.values.sum.toDouble / labelCount /** - * Returns micro-averaged precision - * (equals to microPrecision and microF1measure for multiclass classifier) + * Returns recall + * (equals to precision for multiclass classifier + * because sum of all false positives is equal to sum + * of all false negatives) */ - lazy val precision: Double = recall + lazy val recall: Double = precision /** - * Returns micro-averaged f-measure - * (equals to microPrecision and microRecall for multiclass classifier) + * Returns f-measure + * (equals to precision and recall because precision equals recall) */ - lazy val fMeasure: Double = recall + lazy val fMeasure: Double = precision /** * Returns weighted averaged recall - * (equals to micro-averaged precision, recall and f-measure) + * (equals to precision, recall and f-measure) */ lazy val weightedRecall: Double = labelCountByClass.map { case (category, count) => recall(category) * count.toDouble / labelCount @@ -114,6 +123,5 @@ class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Log /** * Returns the sequence of labels in ascending order */ - lazy val labels = tpByClass.unzip._1.toSeq.sorted - + lazy val labels:Array[Double] = tpByClass.keys.toArray.sorted } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala index 4b959b2d542ac..9bdd5745677aa 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala @@ -30,7 +30,7 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { * |0|0|1| true class2 (1 instance) * */ - val labels = Seq(0.0, 1.0, 2.0) + val labels = Array(0.0, 1.0, 2.0) val scoreAndLabels = sc.parallelize( Seq((0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)), 2) @@ -65,6 +65,6 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { ((4.0 / 9.0) * recall0 + (4.0 / 9.0) * recall1 + (1.0 / 9.0) * recall2)) < delta) assert(math.abs(metrics.weightedF1Measure - ((4.0 / 9.0) * f1measure0 + (4.0 / 9.0) * f1measure1 + (1.0 / 9.0) * f1measure2)) < delta) - assert(metrics.labels == labels) + assert(metrics.labels.sameElements(labels)) } } From e3db56998d2915421e46b159b293c5abc9cc90d0 Mon Sep 17 00:00:00 2001 From: Alexander Ulanov Date: Wed, 9 Jul 2014 15:40:57 +0400 Subject: [PATCH 08/14] Addressing reviewers comments mengxr. Added true positive rate and false positive rate. Test suite code style. --- .../mllib/evaluation/MulticlassMetrics.scala | 32 ++++++++++++++++--- .../evaluation/MulticlassMetricsSuite.scala | 28 +++++++++------- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 8f25a3d0020d0..df30fe601604d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -43,9 +43,24 @@ class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Log private lazy val fpByClass: Map[Double, Int] = predictionsAndLabels .map { case (prediction, label) => (prediction, if (prediction != label) 1 else 0) - }.reduceByKey(_ + _) + }.reduceByKey(_ + _) .collectAsMap() + /** + * Returns true positive rate for a given label (category) + * @param label the label. + */ + def truePositiveRate(label: Double): Double = recall(label) + + /** + * Returns false positive rate for a given label (category) + * @param label the label. + */ + def falsePositiveRate(label: Double): Double = { + val fp = fpByClass.getOrElse(label, 0) + fp.toDouble / (labelCount - labelCountByClass(label)) + } + /** * Returns precision for a given label (category) * @param label the label. @@ -65,6 +80,7 @@ class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Log /** * Returns f-measure for a given label (category) * @param label the label. + * @param beta the beta parameter. */ def fMeasure(label: Double, beta: Double): Double = { val p = precision(label) @@ -113,15 +129,23 @@ class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Log precision(category) * count.toDouble / labelCount }.sum + /** + * Returns weighted averaged f-measure + * @param beta the beta parameter. + */ + def weightedFMeasure(beta: Double): Double = labelCountByClass.map { case (category, count) => + fMeasure(category, beta) * count.toDouble / labelCount + }.sum + /** * Returns weighted averaged f1-measure */ - lazy val weightedF1Measure: Double = labelCountByClass.map { case (category, count) => - fMeasure(category) * count.toDouble / labelCount + lazy val weightedFMeasure: Double = labelCountByClass.map { case (category, count) => + fMeasure(category, 1.0) * count.toDouble / labelCount }.sum /** * Returns the sequence of labels in ascending order */ - lazy val labels:Array[Double] = tpByClass.keys.toArray.sorted + lazy val labels: Array[Double] = tpByClass.keys.toArray.sorted } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala index 9bdd5745677aa..e2dd57d698141 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala @@ -36,15 +36,21 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)), 2) val metrics = new MulticlassMetrics(scoreAndLabels) val delta = 0.0000001 - val precision0 = 2.0 / (2.0 + 1.0) - val precision1 = 3.0 / (3.0 + 1.0) - val precision2 = 1.0 / (1.0 + 1.0) - val recall0 = 2.0 / (2.0 + 2.0) - val recall1 = 3.0 / (3.0 + 1.0) - val recall2 = 1.0 / (1.0 + 0.0) + val fpRate0 = 1.0 / (9 - 4) + val fpRate1 = 1.0 / (9 - 4) + val fpRate2 = 1.0 / (9 - 1) + val precision0 = 2.0 / (2 + 1) + val precision1 = 3.0 / (3 + 1) + val precision2 = 1.0 / (1 + 1) + val recall0 = 2.0 / (2 + 2) + val recall1 = 3.0 / (3 + 1) + val recall2 = 1.0 / (1 + 0) val f1measure0 = 2 * precision0 * recall0 / (precision0 + recall0) val f1measure1 = 2 * precision1 * recall1 / (precision1 + recall1) val f1measure2 = 2 * precision2 * recall2 / (precision2 + recall2) + assert(math.abs(metrics.falsePositiveRate(0.0) - fpRate0) < delta) + assert(math.abs(metrics.falsePositiveRate(1.0) - fpRate1) < delta) + assert(math.abs(metrics.falsePositiveRate(2.0) - fpRate2) < delta) assert(math.abs(metrics.precision(0.0) - precision0) < delta) assert(math.abs(metrics.precision(1.0) - precision1) < delta) assert(math.abs(metrics.precision(2.0) - precision2) < delta) @@ -55,16 +61,16 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { assert(math.abs(metrics.fMeasure(1.0) - f1measure1) < delta) assert(math.abs(metrics.fMeasure(2.0) - f1measure2) < delta) assert(math.abs(metrics.recall - - (2.0 + 3.0 + 1.0) / ((2.0 + 3.0 + 1.0) + (1.0 + 1.0 + 1.0))) < delta) + (2.0 + 3.0 + 1.0) / ((2 + 3 + 1) + (1 + 1 + 1))) < delta) assert(math.abs(metrics.recall - metrics.precision) < delta) assert(math.abs(metrics.recall - metrics.fMeasure) < delta) assert(math.abs(metrics.recall - metrics.weightedRecall) < delta) assert(math.abs(metrics.weightedPrecision - - ((4.0 / 9.0) * precision0 + (4.0 / 9.0) * precision1 + (1.0 / 9.0) * precision2)) < delta) + ((4.0 / 9) * precision0 + (4.0 / 9) * precision1 + (1.0 / 9) * precision2)) < delta) assert(math.abs(metrics.weightedRecall - - ((4.0 / 9.0) * recall0 + (4.0 / 9.0) * recall1 + (1.0 / 9.0) * recall2)) < delta) - assert(math.abs(metrics.weightedF1Measure - - ((4.0 / 9.0) * f1measure0 + (4.0 / 9.0) * f1measure1 + (1.0 / 9.0) * f1measure2)) < delta) + ((4.0 / 9) * recall0 + (4.0 / 9) * recall1 + (1.0 / 9) * recall2)) < delta) + assert(math.abs(metrics.weightedFMeasure - + ((4.0 / 9) * f1measure0 + (4.0 / 9) * f1measure1 + (1.0 / 9) * f1measure2)) < delta) assert(metrics.labels.sameElements(labels)) } } From 87fb11fc17d5d3747e38b9d6649b116369130c63 Mon Sep 17 00:00:00 2001 From: Alexander Ulanov Date: Thu, 10 Jul 2014 14:35:41 +0400 Subject: [PATCH 09/14] Addressing reviewers comments mengxr. Added confusion matrix --- .../mllib/evaluation/MulticlassMetrics.scala | 57 ++++++++++++++----- .../evaluation/MulticlassMetricsSuite.scala | 32 +++++++---- 2 files changed, 66 insertions(+), 23 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index df30fe601604d..b9b783bd47cf4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -17,10 +17,10 @@ package org.apache.spark.mllib.evaluation -import org.apache.spark.annotation.Experimental -import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.SparkContext._ +import org.apache.spark.annotation.Experimental +import org.apache.spark.rdd.RDD import scala.collection.Map @@ -28,23 +28,41 @@ import scala.collection.Map * ::Experimental:: * Evaluator for multiclass classification. * - * @param predictionsAndLabels an RDD of (prediction, label) pairs. + * @param predictionAndLabels an RDD of (prediction, label) pairs. */ @Experimental -class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Logging { +class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) extends Logging { - private lazy val labelCountByClass: Map[Double, Long] = predictionsAndLabels.values.countByValue() + private lazy val labelCountByClass: Map[Double, Long] = predictionAndLabels.values.countByValue() private lazy val labelCount: Long = labelCountByClass.values.sum - private lazy val tpByClass: Map[Double, Int] = predictionsAndLabels + private lazy val tpByClass: Map[Double, Int] = predictionAndLabels .map { case (prediction, label) => - (label, if (label == prediction) 1 else 0) - }.reduceByKey(_ + _) + (label, if (label == prediction) 1 else 0) + }.reduceByKey(_ + _) .collectAsMap() - private lazy val fpByClass: Map[Double, Int] = predictionsAndLabels + private lazy val fpByClass: Map[Double, Int] = predictionAndLabels .map { case (prediction, label) => - (prediction, if (prediction != label) 1 else 0) - }.reduceByKey(_ + _) + (prediction, if (prediction != label) 1 else 0) + }.reduceByKey(_ + _) .collectAsMap() + private lazy val confusions = predictionAndLabels.map { + case (prediction, label) => ((prediction, label), 1) + }.reduceByKey(_ + _).collectAsMap() + + /** + * Returns confusion matrix: + * predicted classes are in columns, + * they are ordered by class label ascending, + * as in "labels" + */ + lazy val confusionMatrix: Array[Array[Int]] = { + val matrix = Array.ofDim[Int](labels.size, labels.size) + println(matrix.length, matrix(0).length) + for (i <- 0 to labels.size - 1; j <- 0 to labels.size - 1) { + matrix(j)(i) = confusions.getOrElse((labels(i), labels(j)), 0) + } + matrix + } /** * Returns true positive rate for a given label (category) @@ -103,8 +121,8 @@ class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Log /** * Returns recall * (equals to precision for multiclass classifier - * because sum of all false positives is equal to sum - * of all false negatives) + * because sum of all false positives is equal to sum + * of all false negatives) */ lazy val recall: Double = precision @@ -114,6 +132,19 @@ class MulticlassMetrics(predictionsAndLabels: RDD[(Double, Double)]) extends Log */ lazy val fMeasure: Double = precision + /** + * Returns weighted true positive rate + * (equals to precision, recall and f-measure) + */ + lazy val weightedTruePositiveRate: Double = weightedRecall + + /** + * Returns weighted false positive rate + */ + lazy val weightedFalsePositiveRate: Double = labelCountByClass.map { case (category, count) => + falsePositiveRate(category) * count.toDouble / labelCount + }.sum + /** * Returns weighted averaged recall * (equals to precision, recall and f-measure) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala index e2dd57d698141..c7b01f0135251 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala @@ -17,24 +17,23 @@ package org.apache.spark.mllib.evaluation -import org.scalatest.FunSuite - import org.apache.spark.mllib.util.LocalSparkContext +import org.scalatest.FunSuite class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { test("Multiclass evaluation metrics") { /* - * Confusion matrix for 3-class classification with total 9 instances: - * |2|1|1| true class0 (4 instances) - * |1|3|0| true class1 (4 instances) - * |0|0|1| true class2 (1 instance) - * - */ + * Confusion matrix for 3-class classification with total 9 instances: + * |2|1|1| true class0 (4 instances) + * |1|3|0| true class1 (4 instances) + * |0|0|1| true class2 (1 instance) + */ + val confusionMatrix = Array(Array(2, 1, 1), Array(1, 3, 0), Array(0, 0, 1)) val labels = Array(0.0, 1.0, 2.0) - val scoreAndLabels = sc.parallelize( + val predictionAndLabels = sc.parallelize( Seq((0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)), 2) - val metrics = new MulticlassMetrics(scoreAndLabels) + val metrics = new MulticlassMetrics(predictionAndLabels) val delta = 0.0000001 val fpRate0 = 1.0 / (9 - 4) val fpRate1 = 1.0 / (9 - 4) @@ -48,6 +47,11 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { val f1measure0 = 2 * precision0 * recall0 / (precision0 + recall0) val f1measure1 = 2 * precision1 * recall1 / (precision1 + recall1) val f1measure2 = 2 * precision2 * recall2 / (precision2 + recall2) + val f2measure0 = (1 + 2 * 2) * precision0 * recall0 / (2 * 2 * precision0 + recall0) + val f2measure1 = (1 + 2 * 2) * precision1 * recall1 / (2 * 2 * precision1 + recall1) + val f2measure2 = (1 + 2 * 2) * precision2 * recall2 / (2 * 2 * precision2 + recall2) + + assert(metrics.confusionMatrix.deep == confusionMatrix.deep) assert(math.abs(metrics.falsePositiveRate(0.0) - fpRate0) < delta) assert(math.abs(metrics.falsePositiveRate(1.0) - fpRate1) < delta) assert(math.abs(metrics.falsePositiveRate(2.0) - fpRate2) < delta) @@ -60,17 +64,25 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { assert(math.abs(metrics.fMeasure(0.0) - f1measure0) < delta) assert(math.abs(metrics.fMeasure(1.0) - f1measure1) < delta) assert(math.abs(metrics.fMeasure(2.0) - f1measure2) < delta) + assert(math.abs(metrics.fMeasure(0.0, 2.0) - f2measure0) < delta) + assert(math.abs(metrics.fMeasure(1.0, 2.0) - f2measure1) < delta) + assert(math.abs(metrics.fMeasure(2.0, 2.0) - f2measure2) < delta) + assert(math.abs(metrics.recall - (2.0 + 3.0 + 1.0) / ((2 + 3 + 1) + (1 + 1 + 1))) < delta) assert(math.abs(metrics.recall - metrics.precision) < delta) assert(math.abs(metrics.recall - metrics.fMeasure) < delta) assert(math.abs(metrics.recall - metrics.weightedRecall) < delta) + assert(math.abs(metrics.weightedFalsePositiveRate - + ((4.0 / 9) * fpRate0 + (4.0 / 9) * fpRate1 + (1.0 / 9) * fpRate2)) < delta) assert(math.abs(metrics.weightedPrecision - ((4.0 / 9) * precision0 + (4.0 / 9) * precision1 + (1.0 / 9) * precision2)) < delta) assert(math.abs(metrics.weightedRecall - ((4.0 / 9) * recall0 + (4.0 / 9) * recall1 + (1.0 / 9) * recall2)) < delta) assert(math.abs(metrics.weightedFMeasure - ((4.0 / 9) * f1measure0 + (4.0 / 9) * f1measure1 + (1.0 / 9) * f1measure2)) < delta) + assert(math.abs(metrics.weightedFMeasure(2.0) - + ((4.0 / 9) * f2measure0 + (4.0 / 9) * f2measure1 + (1.0 / 9) * f2measure2)) < delta) assert(metrics.labels.sameElements(labels)) } } From 481137814990b16c9e9399f0db79ae08c1f8d940 Mon Sep 17 00:00:00 2001 From: Alexander Ulanov Date: Thu, 10 Jul 2014 16:39:32 +0400 Subject: [PATCH 10/14] Removing println --- .../org/apache/spark/mllib/evaluation/MulticlassMetrics.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index b9b783bd47cf4..8b30da69ec86a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -57,7 +57,6 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) extends Logg */ lazy val confusionMatrix: Array[Array[Int]] = { val matrix = Array.ofDim[Int](labels.size, labels.size) - println(matrix.length, matrix(0).length) for (i <- 0 to labels.size - 1; j <- 0 to labels.size - 1) { matrix(j)(i) = confusions.getOrElse((labels(i), labels(j)), 0) } From f0dadc9e8105040ac72ccd1465814200889f73c3 Mon Sep 17 00:00:00 2001 From: Alexander Ulanov Date: Fri, 11 Jul 2014 14:01:31 +0400 Subject: [PATCH 11/14] Addressing reviewers comments mengxr --- .../mllib/evaluation/MulticlassMetrics.scala | 20 ++++++++++--------- .../evaluation/MulticlassMetricsSuite.scala | 5 +++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 8b30da69ec86a..3b7ba7288c0f3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -20,6 +20,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.Logging import org.apache.spark.SparkContext._ import org.apache.spark.annotation.Experimental +import org.apache.spark.mllib.linalg.{Matrices, Matrix} import org.apache.spark.rdd.RDD import scala.collection.Map @@ -31,19 +32,19 @@ import scala.collection.Map * @param predictionAndLabels an RDD of (prediction, label) pairs. */ @Experimental -class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) extends Logging { +class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) { private lazy val labelCountByClass: Map[Double, Long] = predictionAndLabels.values.countByValue() private lazy val labelCount: Long = labelCountByClass.values.sum private lazy val tpByClass: Map[Double, Int] = predictionAndLabels .map { case (prediction, label) => - (label, if (label == prediction) 1 else 0) - }.reduceByKey(_ + _) + (label, if (label == prediction) 1 else 0) + }.reduceByKey(_ + _) .collectAsMap() private lazy val fpByClass: Map[Double, Int] = predictionAndLabels .map { case (prediction, label) => - (prediction, if (prediction != label) 1 else 0) - }.reduceByKey(_ + _) + (prediction, if (prediction != label) 1 else 0) + }.reduceByKey(_ + _) .collectAsMap() private lazy val confusions = predictionAndLabels.map { case (prediction, label) => ((prediction, label), 1) @@ -55,12 +56,13 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) extends Logg * they are ordered by class label ascending, * as in "labels" */ - lazy val confusionMatrix: Array[Array[Int]] = { - val matrix = Array.ofDim[Int](labels.size, labels.size) + lazy val confusionMatrix: Matrix = { + val transposedMatrix = Array.ofDim[Double](labels.size, labels.size) for (i <- 0 to labels.size - 1; j <- 0 to labels.size - 1) { - matrix(j)(i) = confusions.getOrElse((labels(i), labels(j)), 0) + transposedMatrix(i)(j) = confusions.getOrElse((labels(i), labels(j)), 0).toDouble } - matrix + val flatMatrix = transposedMatrix.flatMap(arr => arr) + Matrices.dense(transposedMatrix.length, transposedMatrix(0).length, flatMatrix) } /** diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala index c7b01f0135251..555343d7cdb21 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.mllib.evaluation +import org.apache.spark.mllib.linalg.Matrices import org.apache.spark.mllib.util.LocalSparkContext import org.scalatest.FunSuite @@ -28,7 +29,7 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { * |1|3|0| true class1 (4 instances) * |0|0|1| true class2 (1 instance) */ - val confusionMatrix = Array(Array(2, 1, 1), Array(1, 3, 0), Array(0, 0, 1)) + val confusionMatrix = Matrices.dense(3, 3, Array(2, 1, 0, 1, 3, 0, 1, 0, 1)) val labels = Array(0.0, 1.0, 2.0) val predictionAndLabels = sc.parallelize( Seq((0.0, 0.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0), (1.0, 1.0), @@ -51,7 +52,7 @@ class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { val f2measure1 = (1 + 2 * 2) * precision1 * recall1 / (2 * 2 * precision1 + recall1) val f2measure2 = (1 + 2 * 2) * precision2 * recall2 / (2 * 2 * precision2 + recall2) - assert(metrics.confusionMatrix.deep == confusionMatrix.deep) + assert(metrics.confusionMatrix.toArray.sameElements(confusionMatrix.toArray)) assert(math.abs(metrics.falsePositiveRate(0.0) - fpRate0) < delta) assert(math.abs(metrics.falsePositiveRate(1.0) - fpRate1) < delta) assert(math.abs(metrics.falsePositiveRate(2.0) - fpRate2) < delta) From 0fa9511991951249d66fcda0b30ecde7e35134bb Mon Sep 17 00:00:00 2001 From: Alexander Ulanov Date: Mon, 14 Jul 2014 13:53:22 +0400 Subject: [PATCH 12/14] Addressing reviewers comments mengxr --- .../spark/mllib/evaluation/MulticlassMetrics.scala | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 3b7ba7288c0f3..d48068719a851 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -17,14 +17,13 @@ package org.apache.spark.mllib.evaluation -import org.apache.spark.Logging +import scala.collection.Map + import org.apache.spark.SparkContext._ import org.apache.spark.annotation.Experimental import org.apache.spark.mllib.linalg.{Matrices, Matrix} import org.apache.spark.rdd.RDD -import scala.collection.Map - /** * ::Experimental:: * Evaluator for multiclass classification. @@ -57,12 +56,12 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) { * as in "labels" */ lazy val confusionMatrix: Matrix = { - val transposedMatrix = Array.ofDim[Double](labels.size, labels.size) + val transposedFlatMatrix = Array.ofDim[Double](labels.size * labels.size) for (i <- 0 to labels.size - 1; j <- 0 to labels.size - 1) { - transposedMatrix(i)(j) = confusions.getOrElse((labels(i), labels(j)), 0).toDouble + transposedFlatMatrix(i * labels.size + j) + = confusions.getOrElse((labels(i), labels(j)), 0).toDouble } - val flatMatrix = transposedMatrix.flatMap(arr => arr) - Matrices.dense(transposedMatrix.length, transposedMatrix(0).length, flatMatrix) + Matrices.dense(labels.size, labels.size, transposedFlatMatrix) } /** From 79c35550447ccb914256c1b96702dd76ab065a53 Mon Sep 17 00:00:00 2001 From: Alexander Ulanov Date: Tue, 15 Jul 2014 13:30:44 +0400 Subject: [PATCH 13/14] Addressing reviewers comments mengxr --- .../mllib/evaluation/MulticlassMetrics.scala | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index d48068719a851..292cbaf639f90 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -45,8 +45,9 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) { (prediction, if (prediction != label) 1 else 0) }.reduceByKey(_ + _) .collectAsMap() - private lazy val confusions = predictionAndLabels.map { - case (prediction, label) => ((prediction, label), 1) + private lazy val confusions = predictionAndLabels + .map { case (prediction, label) => + ((prediction, label), 1) }.reduceByKey(_ + _).collectAsMap() /** @@ -55,11 +56,18 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) { * they are ordered by class label ascending, * as in "labels" */ - lazy val confusionMatrix: Matrix = { + def confusionMatrix: Matrix = { val transposedFlatMatrix = Array.ofDim[Double](labels.size * labels.size) - for (i <- 0 to labels.size - 1; j <- 0 to labels.size - 1) { - transposedFlatMatrix(i * labels.size + j) - = confusions.getOrElse((labels(i), labels(j)), 0).toDouble + val n = labels.size + var i, j = 0 + while(i < n){ + j = 0 + while(j < n){ + transposedFlatMatrix(i * labels.size + j) + = confusions.getOrElse((labels(i), labels(j)), 0).toDouble + j += 1 + } + i += 1 } Matrices.dense(labels.size, labels.size, transposedFlatMatrix) } From 5ebeb0849354b7086e334e05529f819092336f21 Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Tue, 15 Jul 2014 02:51:26 -0700 Subject: [PATCH 14/14] minor updates --- .../mllib/evaluation/MulticlassMetrics.scala | 20 +++++++++---------- .../evaluation/MulticlassMetricsSuite.scala | 3 ++- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 292cbaf639f90..666362ae6739a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -47,8 +47,9 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) { .collectAsMap() private lazy val confusions = predictionAndLabels .map { case (prediction, label) => - ((prediction, label), 1) - }.reduceByKey(_ + _).collectAsMap() + ((label, prediction), 1) + }.reduceByKey(_ + _) + .collectAsMap() /** * Returns confusion matrix: @@ -57,19 +58,18 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) { * as in "labels" */ def confusionMatrix: Matrix = { - val transposedFlatMatrix = Array.ofDim[Double](labels.size * labels.size) val n = labels.size - var i, j = 0 - while(i < n){ - j = 0 - while(j < n){ - transposedFlatMatrix(i * labels.size + j) - = confusions.getOrElse((labels(i), labels(j)), 0).toDouble + val values = Array.ofDim[Double](n * n) + var i = 0 + while (i < n) { + var j = 0 + while (j < n) { + values(i + j * n) = confusions.getOrElse((labels(i), labels(j)), 0).toDouble j += 1 } i += 1 } - Matrices.dense(labels.size, labels.size, transposedFlatMatrix) + Matrices.dense(n, n, values) } /** diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala index 555343d7cdb21..1ea503971c864 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala @@ -17,9 +17,10 @@ package org.apache.spark.mllib.evaluation +import org.scalatest.FunSuite + import org.apache.spark.mllib.linalg.Matrices import org.apache.spark.mllib.util.LocalSparkContext -import org.scalatest.FunSuite class MulticlassMetricsSuite extends FunSuite with LocalSparkContext { test("Multiclass evaluation metrics") {