Skip to content

Commit

Permalink
fix accumulator with reused worker
Browse files Browse the repository at this point in the history
  • Loading branch information
davies committed Sep 10, 2014
1 parent 760ab1f commit 3133a60
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 0 deletions.
11 changes: 11 additions & 0 deletions python/pyspark/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,17 @@ def test_after_jvm_exception(self):
rdd = self.sc.parallelize(range(100), 1)
self.assertEqual(100, rdd.map(str).count())

def test_accumulator_when_reuse_worker(self):
from pyspark.accumulators import INT_ACCUMULATOR_PARAM
acc1 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
self.sc.parallelize(range(100), 20).foreach(lambda x: acc1.add(x))
self.assertEqual(sum(range(100)), acc1.value)

acc2 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
self.sc.parallelize(range(100), 20).foreach(lambda x: acc2.add(x))
self.assertEqual(sum(range(100)), acc2.value)
self.assertEqual(sum(range(100)), acc1.value)


class TestSparkSubmit(unittest.TestCase):

Expand Down
1 change: 1 addition & 0 deletions python/pyspark/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def main(infile, outfile):
bid = - bid - 1
_broadcastRegistry.pop(bid, None)

_accumulatorRegistry.clear()
command = pickleSer._read_with_length(infile)
(func, deserializer, serializer) = command
init_time = time.time()
Expand Down

0 comments on commit 3133a60

Please sign in to comment.