From b40bae770c850e9a8a5e742af850100a659c7949 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Sat, 16 Aug 2014 00:44:18 -0700 Subject: [PATCH] bugfix --- python/pyspark/rdd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 05a3570a9b8ba..a843646f6657a 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -1627,7 +1627,7 @@ def groupByKey(it): BatchedSerializer(PickleSerializer(), 1024), 10) sorter = ExternalSorter(memory * 0.9, ser) it = sorter.sorted(it, key=operator.itemgetter(0)) - return imap(lambda (k, v): ResultIterable(v), GroupByKey(it)) + return imap(lambda (k, v): (k, ResultIterable(v)), GroupByKey(it)) else: # this is faster than sort based