Skip to content

Commit

Permalink
Bug fixes for examples SampledRDDs.scala and sampled_rdds.py: Check f…
Browse files Browse the repository at this point in the history
…or division by 0 and for missing key in maps.
  • Loading branch information
jkbradley committed Aug 18, 2014
1 parent 8d1e555 commit dafebe2
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
10 changes: 9 additions & 1 deletion examples/src/main/python/mllib/sampled_rdds.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@

examples = MLUtils.loadLibSVMFile(sc, datapath)
numExamples = examples.count()
if numExamples == 0:
print >> sys.stderr, "Error: Data file had no samples to load."
exit(1)
print 'Loaded data with %d examples from file: %s' % (numExamples, datapath)

# Example: RDD.sample() and RDD.takeSample()
Expand Down Expand Up @@ -73,6 +76,11 @@
print ' \tFractions of examples with key'
print 'Key\tOrig\tSample'
for k in sorted(keyCountsA.keys()):
print '%d\t%g\t%g' % (k, keyCountsA[k] / float(numExamples), keyCountsB[k] / float(sizeB))
fracA = keyCountsA[k] / float(numExamples)
if sizeB != 0:
fracB = keyCountsB.get(k, 0) / float(sizeB)
else:
fracB = 0
print '%d\t%g\t%g' % (k, fracA, fracB)

sc.stop()
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ object SampledRDDs {

val examples = MLUtils.loadLibSVMFile(sc, params.input)
val numExamples = examples.count()
if (numExamples == 0) {
throw new RuntimeException("Error: Data file had no samples to load.")
}
println(s"Loaded data with $numExamples examples from file: ${params.input}")

// Example: RDD.sample() and RDD.takeSample()
Expand Down Expand Up @@ -105,8 +108,16 @@ object SampledRDDs {
println(s"Key\tOrig\tApprox Sample\tExact Sample")
keyCounts.keys.toSeq.sorted.foreach { key =>
val origFrac = keyCounts(key) / numExamples.toDouble
val approxFrac = keyCountsB(key) / sizeB.toDouble
val exactFrac = keyCountsBExact(key) / sizeBExact.toDouble
val approxFrac = if (sizeB != 0) {
keyCountsB.getOrElse(key, 0L) / sizeB.toDouble
} else {
0
}
val exactFrac = if (sizeBExact != 0) {
keyCountsBExact.getOrElse(key, 0L) / sizeBExact.toDouble
} else {
0
}
println(s"$key\t$origFrac\t$approxFrac\t$exactFrac")
}

Expand Down

0 comments on commit dafebe2

Please sign in to comment.