Skip to content

Commit

Permalink
change style and use alternative builtin methods
Browse files Browse the repository at this point in the history
  • Loading branch information
coderxiang committed Oct 6, 2014
1 parent 3a5a6ff commit e443fee
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class RankingMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
lazy val precAtK: RDD[Array[Double]] = predictionAndLabels.map {case (pred, lab)=>
val labSet : Set[Double] = lab.toSet
val n = pred.length
val topkPrec = Array.fill[Double](n)(.0)
val topkPrec = Array.fill[Double](n)(0.0)
var (i, cnt) = (0, 0)

while (i < n) {
Expand All @@ -56,7 +56,7 @@ class RankingMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
*/
lazy val avePrec: RDD[Double] = predictionAndLabels.map {case (pred, lab) =>
val labSet: Set[Double] = lab.toSet
var (i, cnt, precSum) = (0, 0, .0)
var (i, cnt, precSum) = (0, 0, 0.0)
val n = pred.length

while (i < n) {
Expand All @@ -72,15 +72,15 @@ class RankingMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
/**
* Returns the mean average precision (MAP) of all the queries
*/
lazy val meanAvePrec: Double = computeMean(avePrec)
lazy val meanAvePrec: Double = avePrec.mean

/**
* Returns the normalized discounted cumulative gain for each query
*/
lazy val ndcg: RDD[Double] = predictionAndLabels.map {case (pred, lab) =>
val labSet = lab.toSet
val n = math.min(pred.length, labSet.size)
var (maxDcg, dcg, i) = (.0, .0, 0)
var (maxDcg, dcg, i) = (0.0, 0.0, 0)
while (i < n) {
/* Calculate 1/log2(i + 2) */
val gain = 1.0 / (math.log(i + 2) / math.log(2))
Expand All @@ -96,13 +96,5 @@ class RankingMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
/**
* Returns the mean NDCG of all the queries
*/
lazy val meanNdcg: Double = computeMean(ndcg)

private def computeMean(data: RDD[Double]): Double = {
val stat = data.aggregate((.0, 0))(
seqOp = (c, v) => (c, v) match {case ((sum, cnt), a) => (sum + a, cnt + 1)},
combOp = (c1, c2) => (c1, c2) match {case (x, y) => (x._1 + y._1, x._2 + y._2)}
)
stat._1 / stat._2
}
lazy val meanNdcg: Double = ndcg.mean
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.mllib.evaluation

import org.scalatest.FunSuite

import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.mllib.util.LocalSparkContext

class RankingMetricsSuite extends FunSuite with LocalSparkContext {
Expand All @@ -28,7 +28,7 @@ class RankingMetricsSuite extends FunSuite with LocalSparkContext {
(Array[Double](1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Array[Double](1, 2, 3, 4, 5)),
(Array[Double](4, 1, 5, 6, 2, 7, 3, 8, 9, 10), Array[Double](1, 2, 3))
), 2)
val eps: Double = 1e-5
val eps: Double = 1E-5

val metrics = new RankingMetrics(predictionAndLabels)
val precAtK = metrics.precAtK.collect()
Expand All @@ -37,13 +37,13 @@ class RankingMetricsSuite extends FunSuite with LocalSparkContext {
val ndcg = metrics.ndcg.collect()
val aveNdcg = metrics.meanNdcg

assert(math.abs(precAtK(0)(4) - 0.4) < eps)
assert(math.abs(precAtK(1)(6) - 3.0/7) < eps)
assert(math.abs(avePrec(0) - 0.622222) < eps)
assert(math.abs(avePrec(1) - 0.442857) < eps)
assert(math.abs(map - 0.532539) < eps)
assert(math.abs(ndcg(0) - 0.508740) < eps)
assert(math.abs(ndcg(1) - 0.296082) < eps)
assert(math.abs(aveNdcg - 0.402411) < eps)
assert(precAtK(0)(4) ~== 0.4 absTol eps)
assert(precAtK(1)(6) ~== 3.0/7 absTol eps)
assert(avePrec(0) ~== 0.622222 absTol eps)
assert(avePrec(1) ~== 0.442857 absTol eps)
assert(map ~== 0.532539 absTol eps)
assert(ndcg(0) ~== 0.508740 absTol eps)
assert(ndcg(1) ~== 0.296082 absTol eps)
assert(aveNdcg ~== 0.402411 absTol eps)
}
}

0 comments on commit e443fee

Please sign in to comment.