Skip to content

Commit

Permalink
Merge pull request apache#117 from sun-rui/keyBy
Browse files Browse the repository at this point in the history
Add keyBy() to the RDD class.
  • Loading branch information
shivaram committed Dec 11, 2014
2 parents d0347ce + 09083d9 commit 0fa48d1
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ exportMethods(
"foreachPartition",
"groupByKey",
"join",
"keyBy",
"keys",
"length",
"lapply",
Expand Down
25 changes: 25 additions & 0 deletions pkg/R/RDD.R
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,31 @@ setMethod("takeSample", signature(rdd = "RDD", withReplacement = "logical",
sample(samples)[1:total]
})

#' Creates tuples of the elements in this RDD by applying a function.
#'
#' @param rdd The RDD.
#' @param func The function to be applied.
#' @rdname keyBy
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' rdd <- parallelize(sc, list(1, 2, 3))
#' collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3))
#'}
setGeneric("keyBy", function(rdd, func) { standardGeneric("keyBy") })

#' @rdname keyBy
#' @aliases keyBy,RDD
setMethod("keyBy",
signature(rdd = "RDD", func = "function"),
function(rdd, func) {
apply.func <- function(x) {
list(func(x), x)
}
lapply(rdd, apply.func)
})

#' Return an RDD with the keys of each tuple.
#'
#' @param rdd The RDD from which the keys of each tuple is returned.
Expand Down
7 changes: 7 additions & 0 deletions pkg/inst/tests/test_rdd.R
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,13 @@ test_that("minimum() on RDDs", {
expect_equal(min, 1)
})

test_that("keyBy on RDDs", {
func <- function(x) { x*x }
keys <- keyBy(rdd, func)
actual <- collect(keys)
expect_equal(actual, lapply(nums, function(x) { list(func(x), x) }))
})

test_that("keys() on RDDs", {
keys <- keys(intRdd)
actual <- collect(keys)
Expand Down
28 changes: 28 additions & 0 deletions pkg/man/keyBy.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
% Generated by roxygen2 (4.0.2): do not edit by hand
\docType{methods}
\name{keyBy}
\alias{keyBy}
\alias{keyBy,RDD}
\alias{keyBy,RDD,function-method}
\title{Creates tuples of the elements in this RDD by applying a function.}
\usage{
keyBy(rdd, func)

\S4method{keyBy}{RDD,`function`}(rdd, func)
}
\arguments{
\item{rdd}{The RDD.}

\item{func}{The function to be applied.}
}
\description{
Creates tuples of the elements in this RDD by applying a function.
}
\examples{
\dontrun{
sc <- sparkR.init()
rdd <- parallelize(sc, list(1, 2, 3))
collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3))
}
}

0 comments on commit 0fa48d1

Please sign in to comment.