Skip to content

Commit

Permalink
add python doc
Browse files Browse the repository at this point in the history
  • Loading branch information
yanboliang committed Mar 31, 2015
1 parent b18fd07 commit dcf7d73
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,7 @@ private[python] class PythonMLLibAPI extends Serializable {
val model = new GaussianMixtureModel(weight, gaussians)
model.predictSoft(data)
}




/**
* Java stub for Python mllib ALS.train(). This stub returns a handle
* to the Java object instead of the content of the Java object. Extra care
Expand Down Expand Up @@ -408,10 +406,11 @@ private[python] class PythonMLLibAPI extends Serializable {
}

/**
* A Wrapper of FPGrowthModel to provide helpfer method for Python
* A Wrapper of FPGrowthModel to provide helper method for Python
*/
private[python] class FPGrowthModelWrapper(model: FPGrowthModel[Any])
extends FPGrowthModel(model.freqItemsets) {

def getFreqItemsets: RDD[Array[Any]] = {
SerDe.fromTuple2RDD(model.freqItemsets.map(x => (x.javaItems, x.freq)))
}
Expand All @@ -423,12 +422,13 @@ private[python] class PythonMLLibAPI extends Serializable {
* needs to be taken in the Python code to ensure it gets freed on exit; see
* the Py4J documentation.
*/
def trainFPGrowthModel(data: JavaRDD[java.lang.Iterable[Any]],
def trainFPGrowthModel(
data: JavaRDD[java.lang.Iterable[Any]],
minSupport: Double,
numPartition: Int): FPGrowthModel[Any] = {
numPartitions: Int): FPGrowthModel[Any] = {
val fpm = new FPGrowth()
.setMinSupport(minSupport)
.setNumPartitions(numPartition)
.setNumPartitions(numPartitions)

val model = fpm.run(data.rdd.map(_.asScala.toArray))
new FPGrowthModelWrapper(model)
Expand Down
7 changes: 7 additions & 0 deletions python/docs/pyspark.mllib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ pyspark.mllib.feature module
:undoc-members:
:show-inheritance:

pyspark.mllib.fpm module
------------------------

.. automodule:: pyspark.mllib.fpm
:members:
:undoc-members:

pyspark.mllib.linalg module
---------------------------

Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/mllib/fpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def freqItemsets(self):
class FPGrowth(object):

@classmethod
def train(cls, data, minSupport=0.3, numPartition=-1):
model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartition))
def train(cls, data, minSupport=0.3, numPartitions=-1):
model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions))
return FPGrowthModel(model)


Expand Down

0 comments on commit dcf7d73

Please sign in to comment.