Skip to content

Commit

Permalink
Fix python style warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
sryza committed Sep 8, 2014
1 parent f147634 commit e5381cd
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def repartitionAndSortWithinPartition(self, ascending=True, numPartitions=None,
"""
Repartition the RDD according to the given partitioner and, within each resulting partition,
sort records by their keys.
>>> rdd = sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)])
>>> rdd2 = rdd.repartitionAndSortWithinPartition(True, lambda x: x % 2, 2)
>>> rdd2.glom().collect()
Expand All @@ -541,7 +541,7 @@ def repartitionAndSortWithinPartition(self, ascending=True, numPartitions=None,
def sortPartition(iterator):
sort = ExternalSorter(memory * 0.9, serializer).sorted if spill else sorted
return iter(sort(iterator, key=lambda (k, v): keyfunc(k), reverse=(not ascending)))

return self.partitionBy(numPartitions, partitionFunc).mapPartitions(sortPartition, True)

def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def test_histogram(self):

def test_repartitionAndSortWithinPartition(self):
rdd = self.sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)], 2)

repartitioned = rdd.repartitionAndSortWithinPartition(True, 2, lambda key: key % 2)
partitions = repartitioned.glom().collect()
self.assertEquals(partitions[0], [(0, 5), (0, 8), (2, 6)])
Expand Down

0 comments on commit e5381cd

Please sign in to comment.