Skip to content

Commit

Permalink
Wrap FPGrowthModel.freqItemsets and make it consistent with Java API
Browse files Browse the repository at this point in the history
  • Loading branch information
yanboliang committed Apr 21, 2015
1 parent 1f2f723 commit 5532e78
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions python/pyspark/mllib/fpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
# limitations under the License.
#

import numpy
from numpy import array

from pyspark import SparkContext
from pyspark.rdd import ignore_unicode_prefix
from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, inherit_doc
Expand All @@ -35,15 +38,15 @@ class FPGrowthModel(JavaModelWrapper):
>>> data = [["a", "b", "c"], ["a", "b", "d", "e"], ["a", "c", "e"], ["a", "c", "f"]]
>>> rdd = sc.parallelize(data, 2)
>>> model = FPGrowth.train(rdd, 0.6, 2)
>>> sorted(model.freqItemsets().collect())
[([u'a'], 4), ([u'c'], 3), ([u'c', u'a'], 3)]
>>> model.freqItemsets().collect()
[(array([u'a'], ...), 4), (array([u'c'], ...), 3), (array([u'c', u'a'], ...), 3)]
"""

def freqItemsets(self):
"""
Get the frequent itemsets of this model
"""
return self.call("getFreqItemsets")
return self.call("getFreqItemsets").map(lambda x: (numpy.array(x[0]), x[1]))


class FPGrowth(object):
Expand Down

0 comments on commit 5532e78

Please sign in to comment.