diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 678d5e8f84ef9..d6845c9e8510c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -345,9 +345,7 @@ private[python] class PythonMLLibAPI extends Serializable { val model = new GaussianMixtureModel(weight, gaussians) model.predictSoft(data) } - - - + /** * Java stub for Python mllib ALS.train(). This stub returns a handle * to the Java object instead of the content of the Java object. Extra care @@ -408,10 +406,11 @@ private[python] class PythonMLLibAPI extends Serializable { } /** - * A Wrapper of FPGrowthModel to provide helpfer method for Python + * A Wrapper of FPGrowthModel to provide helper method for Python */ private[python] class FPGrowthModelWrapper(model: FPGrowthModel[Any]) extends FPGrowthModel(model.freqItemsets) { + def getFreqItemsets: RDD[Array[Any]] = { SerDe.fromTuple2RDD(model.freqItemsets.map(x => (x.javaItems, x.freq))) } @@ -423,12 +422,13 @@ private[python] class PythonMLLibAPI extends Serializable { * needs to be taken in the Python code to ensure it gets freed on exit; see * the Py4J documentation. */ - def trainFPGrowthModel(data: JavaRDD[java.lang.Iterable[Any]], + def trainFPGrowthModel( + data: JavaRDD[java.lang.Iterable[Any]], minSupport: Double, - numPartition: Int): FPGrowthModel[Any] = { + numPartitions: Int): FPGrowthModel[Any] = { val fpm = new FPGrowth() .setMinSupport(minSupport) - .setNumPartitions(numPartition) + .setNumPartitions(numPartitions) val model = fpm.run(data.rdd.map(_.asScala.toArray)) new FPGrowthModelWrapper(model) diff --git a/python/docs/pyspark.mllib.rst b/python/docs/pyspark.mllib.rst index 15101470afc07..26ece4c2c389a 100644 --- a/python/docs/pyspark.mllib.rst +++ b/python/docs/pyspark.mllib.rst @@ -31,6 +31,13 @@ pyspark.mllib.feature module :undoc-members: :show-inheritance: +pyspark.mllib.fpm module +------------------------ + +.. automodule:: pyspark.mllib.fpm + :members: + :undoc-members: + pyspark.mllib.linalg module --------------------------- diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py index b09dfd4fcc603..423fc7187521e 100644 --- a/python/pyspark/mllib/fpm.py +++ b/python/pyspark/mllib/fpm.py @@ -54,8 +54,8 @@ def freqItemsets(self): class FPGrowth(object): @classmethod - def train(cls, data, minSupport=0.3, numPartition=-1): - model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartition)) + def train(cls, data, minSupport=0.3, numPartitions=-1): + model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions)) return FPGrowthModel(model)