apache · HyukjinKwon · Sep 19, 2017 · Sep 20, 2017 · Sep 21, 2017 · Sep 21, 2017
diff --git a/R/pkg/.lintr b/R/pkg/.lintr
@@ -1,2 +1,2 @@
-linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
+linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, object_name_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
 exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -1923,13 +1923,15 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' @param i,subset (Optional) a logical expression to filter on rows.
 #'                 For extract operator [[ and replacement operator [[<-, the indexing parameter for
 #'                 a single Column.
-#' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame.
+#' @param j,select expression for the single Column or a list of columns to select from the
+#'                 SparkDataFrame.
 #' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column.
 #'             Otherwise, a SparkDataFrame will always be returned.
 #' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}.
 #'              If \code{NULL}, the specified Column is dropped.
 #' @param ... currently not used.
-#' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns.
+#' @return A new SparkDataFrame containing only the rows that meet the condition with selected
+#'         columns.
 #' @export
 #' @family SparkDataFrame functions
 #' @aliases subset,SparkDataFrame-method
@@ -2608,12 +2610,12 @@ setMethod("merge",
             } else {
               # if by or both by.x and by.y have length 0, use Cartesian Product
               joinRes <- crossJoin(x, y)
-              return (joinRes)
+              return(joinRes)
             }
 
             # sets alias for making colnames unique in dataframes 'x' and 'y'
-            colsX <- generateAliasesForIntersectedCols(x, by, suffixes[1])
-            colsY <- generateAliasesForIntersectedCols(y, by, suffixes[2])
+            colsX <- genAliasesForIntersectedCols(x, by, suffixes[1])
+            colsY <- genAliasesForIntersectedCols(y, by, suffixes[2])
 
             # selects columns with their aliases from dataframes
             # in case same column names are present in both data frames
@@ -2661,17 +2663,16 @@ setMethod("merge",
 #' @param intersectedColNames a list of intersected column names of the SparkDataFrame
 #' @param suffix a suffix for the column name
 #' @return list of columns
-#'
-#' @note generateAliasesForIntersectedCols since 1.6.0
-generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
+#' @noRd
+genAliasesForIntersectedCols <- function(x, intersectedColNames, suffix) {
   allColNames <- names(x)
   # sets alias for making colnames unique in dataframe 'x'
   cols <- lapply(allColNames, function(colName) {
     col <- getColumn(x, colName)
     if (colName %in% intersectedColNames) {
       newJoin <- paste(colName, suffix, sep = "")
       if (newJoin %in% allColNames){
-        stop ("The following column name: ", newJoin, " occurs more than once in the 'DataFrame'.",
+        stop("The following column name: ", newJoin, " occurs more than once in the 'DataFrame'.",
           "Please use different suffixes for the intersected columns.")
       }
       col <- alias(col, newJoin)
@@ -3058,7 +3059,8 @@ setMethod("describe",
 #' summary(select(df, "age", "height"))
 #' }
 #' @note summary(SparkDataFrame) since 1.5.0
-#' @note The statistics provided by \code{summary} were change in 2.3.0 use \link{describe} for previous defaults.
+#' @note The statistics provided by \code{summary} were change in 2.3.0 use \link{describe} for
+#'       previous defaults.
 #' @seealso \link{describe}
 setMethod("summary",
           signature(object = "SparkDataFrame"),
@@ -3765,8 +3767,8 @@ setMethod("checkpoint",
 #'
 #' Create a multi-dimensional cube for the SparkDataFrame using the specified columns.
 #'
-#' If grouping expression is missing \code{cube} creates a single global aggregate and is equivalent to
-#' direct application of \link{agg}.
+#' If grouping expression is missing \code{cube} creates a single global aggregate and is
+#' equivalent to direct application of \link{agg}.
 #'
 #' @param x a SparkDataFrame.
 #' @param ... character name(s) or Column(s) to group on.
@@ -3800,8 +3802,8 @@ setMethod("cube",
 #'
 #' Create a multi-dimensional rollup for the SparkDataFrame using the specified columns.
 #'
-#' If grouping expression is missing \code{rollup} creates a single global aggregate and is equivalent to
-#' direct application of \link{agg}.
+#' If grouping expression is missing \code{rollup} creates a single global aggregate and is
+#' equivalent to direct application of \link{agg}.
 #'
 #' @param x a SparkDataFrame.
 #' @param ... character name(s) or Column(s) to group on.

diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
@@ -131,7 +131,7 @@ PipelinedRDD <- function(prev, func) {
 # Return the serialization mode for an RDD.
 setGeneric("getSerializedMode", function(rdd, ...) { standardGeneric("getSerializedMode") })
 # For normal RDDs we can directly read the serializedMode
-setMethod("getSerializedMode", signature(rdd = "RDD"), function(rdd) rdd@env$serializedMode )
+setMethod("getSerializedMode", signature(rdd = "RDD"), function(rdd) rdd@env$serializedMode)
 # For pipelined RDDs if jrdd_val is set then serializedMode should exist
 # if not we return the defaultSerialization mode of "byte" as we don't know the serialization
 # mode at this point in time.
@@ -145,7 +145,7 @@ setMethod("getSerializedMode", signature(rdd = "PipelinedRDD"),
           })
 
 # The jrdd accessor function.
-setMethod("getJRDD", signature(rdd = "RDD"), function(rdd) rdd@jrdd )
+setMethod("getJRDD", signature(rdd = "RDD"), function(rdd) rdd@jrdd)
 setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
           function(rdd, serializedMode = "byte") {
             if (!is.null(rdd@env$jrdd_val)) {
@@ -893,7 +893,7 @@ setMethod("sampleRDD",
                 if (withReplacement) {
                   count <- stats::rpois(1, fraction)
                   if (count > 0) {
-                    res[ (len + 1) : (len + count) ] <- rep(list(elem), count)
+                    res[(len + 1) : (len + count)] <- rep(list(elem), count)
                     len <- len + count
                   }
                 } else {

diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
@@ -73,7 +73,7 @@ setMethod("show", "WindowSpec",
 setMethod("partitionBy",
           signature(x = "WindowSpec"),
           function(x, col, ...) {
-            stopifnot (class(col) %in% c("character", "Column"))
+            stopifnot(class(col) %in% c("character", "Column"))
 
             if (class(col) == "character") {
               windowSpec(callJMethod(x@sws, "partitionBy", col, list(...)))

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
@@ -238,8 +238,10 @@ setMethod("between", signature(x = "Column"),
 #' @param x a Column.
 #' @param dataType a character object describing the target data type.
 #'        See
+# nolint start
 #'        \href{https://spark.apache.org/docs/latest/sparkr.html#data-type-mapping-between-r-and-spark}{
 #'        Spark Data Types} for available data types.
+# nolint end
 #' @rdname cast
 #' @name cast
 #' @family colum_func

diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
@@ -329,7 +329,7 @@ spark.addFile <- function(path, recursive = FALSE) {
 #' spark.getSparkFilesRootDirectory()
 #'}
 #' @note spark.getSparkFilesRootDirectory since 2.1.0
-spark.getSparkFilesRootDirectory <- function() {
+spark.getSparkFilesRootDirectory <- function() { # nolint
   if (Sys.getenv("SPARKR_IS_RUNNING_ON_WORKER") == "") {
     # Running on driver.
     callJStatic("org.apache.spark.SparkFiles", "getRootDirectory")

diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
@@ -43,7 +43,7 @@ readObject <- function(con) {
 }
 
 readTypedObject <- function(con, type) {
-  switch (type,
+  switch(type,
     "i" = readInt(con),
     "c" = readString(con),
     "b" = readBoolean(con),

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
@@ -38,7 +38,8 @@ NULL
 #'
 #' Date time functions defined for \code{Column}.
 #'
-#' @param x Column to compute on. In \code{window}, it must be a time Column of \code{TimestampType}.
+#' @param x Column to compute on. In \code{window}, it must be a time Column of
+#'          \code{TimestampType}.
 #' @param format For \code{to_date} and \code{to_timestamp}, it is the string to use to parse
 #'               Column \code{x} to DateType or TimestampType. For \code{trunc}, it is the string
 #'               to use to specify the truncation method. For example, "year", "yyyy", "yy" for
@@ -90,8 +91,8 @@ NULL
 #'
 #' Math functions defined for \code{Column}.
 #'
-#' @param x Column to compute on. In \code{shiftLeft}, \code{shiftRight} and \code{shiftRightUnsigned},
-#'          this is the number of bits to shift.
+#' @param x Column to compute on. In \code{shiftLeft}, \code{shiftRight} and
+#'          \code{shiftRightUnsigned}, this is the number of bits to shift.
 #' @param y Column to compute on.
 #' @param ... additional argument(s).
 #' @name column_math_functions
@@ -480,7 +481,7 @@ setMethod("ceiling",
 setMethod("coalesce",
           signature(x = "Column"),
           function(x, ...) {
-            jcols <- lapply(list(x, ...), function (x) {
+            jcols <- lapply(list(x, ...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })
@@ -676,7 +677,7 @@ setMethod("crc32",
 setMethod("hash",
           signature(x = "Column"),
           function(x, ...) {
-            jcols <- lapply(list(x, ...), function (x) {
+            jcols <- lapply(list(x, ...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })
@@ -1310,9 +1311,9 @@ setMethod("round",
 #' Also known as Gaussian rounding or bankers' rounding that rounds to the nearest even number.
 #' bround(2.5, 0) = 2, bround(3.5, 0) = 4.
 #'
-#' @param scale round to \code{scale} digits to the right of the decimal point when \code{scale} > 0,
-#'        the nearest even number when \code{scale} = 0, and \code{scale} digits to the left
-#'        of the decimal point when \code{scale} < 0.
+#' @param scale round to \code{scale} digits to the right of the decimal point when
+#'        \code{scale} > 0, the nearest even number when \code{scale} = 0, and \code{scale} digits
+#'        to the left of the decimal point when \code{scale} < 0.
 #' @rdname column_math_functions
 #' @aliases bround bround,Column-method
 #' @export
@@ -2005,8 +2006,9 @@ setMethod("months_between", signature(y = "Column"),
           })
 
 #' @details
-#' \code{nanvl}: Returns the first column (\code{y}) if it is not NaN, or the second column (\code{x}) if
-#' the first column is NaN. Both inputs should be floating point columns (DoubleType or FloatType).
+#' \code{nanvl}: Returns the first column (\code{y}) if it is not NaN, or the second column
+#' (\code{x}) if the first column is NaN. Both inputs should be floating point columns
+#' (DoubleType or FloatType).
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases nanvl nanvl,Column-method
@@ -2061,7 +2063,7 @@ setMethod("approxCountDistinct",
 setMethod("countDistinct",
           signature(x = "Column"),
           function(x, ...) {
-            jcols <- lapply(list(...), function (x) {
+            jcols <- lapply(list(...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })
@@ -2090,7 +2092,7 @@ setMethod("countDistinct",
 setMethod("concat",
           signature(x = "Column"),
           function(x, ...) {
-            jcols <- lapply(list(x, ...), function (x) {
+            jcols <- lapply(list(x, ...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })
@@ -2110,7 +2112,7 @@ setMethod("greatest",
           signature(x = "Column"),
           function(x, ...) {
             stopifnot(length(list(...)) > 0)
-            jcols <- lapply(list(x, ...), function (x) {
+            jcols <- lapply(list(x, ...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })
@@ -2130,7 +2132,7 @@ setMethod("least",
           signature(x = "Column"),
           function(x, ...) {
             stopifnot(length(list(...)) > 0)
-            jcols <- lapply(list(x, ...), function (x) {
+            jcols <- lapply(list(x, ...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })
@@ -2406,8 +2408,8 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
           })
 
 #' @details
-#' \code{shiftRight}: (Signed) shifts the given value numBits right. If the given value is a long value,
-#' it will return a long value else it will return an integer value.
+#' \code{shiftRight}: (Signed) shifts the given value numBits right. If the given value is a long
+#' value, it will return a long value else it will return an integer value.
 #'
 #' @rdname column_math_functions
 #' @aliases shiftRight shiftRight,Column,numeric-method
@@ -2505,9 +2507,10 @@ setMethod("format_string", signature(format = "character", x = "Column"),
           })
 
 #' @details
-#' \code{from_unixtime}: Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a
-#' string representing the timestamp of that moment in the current system time zone in the JVM in the
-#' given format. See \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
+#' \code{from_unixtime}: Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC)
+#' to a string representing the timestamp of that moment in the current system time zone in the JVM
+#' in the given format.
+#' See \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
 #' Customizing Formats} for available options.
 #'
 #' @rdname column_datetime_functions
@@ -2634,8 +2637,8 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
           })
 
 #' @details
-#' \code{rand}: Generates a random column with independent and identically distributed (i.i.d.) samples
-#' from U[0.0, 1.0].
+#' \code{rand}: Generates a random column with independent and identically distributed (i.i.d.)
+#' samples from U[0.0, 1.0].
 #'
 #' @rdname column_nonaggregate_functions
 #' @param seed a random seed. Can be missing.
@@ -2664,8 +2667,8 @@ setMethod("rand", signature(seed = "numeric"),
           })
 
 #' @details
-#' \code{randn}: Generates a column with independent and identically distributed (i.i.d.) samples from
-#' the standard normal distribution.
+#' \code{randn}: Generates a column with independent and identically distributed (i.i.d.) samples
+#' from the standard normal distribution.
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases randn randn,missing-method
@@ -2831,8 +2834,8 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
           })
 
 #' @details
-#' \code{when}: Evaluates a list of conditions and returns one of multiple possible result expressions.
-#' For unmatched expressions null is returned.
+#' \code{when}: Evaluates a list of conditions and returns one of multiple possible result
+#' expressions. For unmatched expressions null is returned.
 #'
 #' @rdname column_nonaggregate_functions
 #' @param condition the condition to test on. Must be a Column expression.
@@ -2859,8 +2862,8 @@ setMethod("when", signature(condition = "Column", value = "ANY"),
           })
 
 #' @details
-#' \code{ifelse}: Evaluates a list of conditions and returns \code{yes} if the conditions are satisfied.
-#' Otherwise \code{no} is returned for unmatched conditions.
+#' \code{ifelse}: Evaluates a list of conditions and returns \code{yes} if the conditions are
+#' satisfied. Otherwise \code{no} is returned for unmatched conditions.
 #'
 #' @rdname column_nonaggregate_functions
 #' @param test a Column expression that describes the condition.
@@ -2990,7 +2993,8 @@ setMethod("ntile",
           })
 
 #' @details
-#' \code{percent_rank}: Returns the relative rank (i.e. percentile) of rows within a window partition.
+#' \code{percent_rank}: Returns the relative rank (i.e. percentile) of rows within a window
+#' partition.
 #' This is computed by: (rank of row in its partition - 1) / (number of rows in the partition - 1).
 #' This is equivalent to the \code{PERCENT_RANK} function in SQL.
 #' The method should be used with no argument.
@@ -3160,7 +3164,8 @@ setMethod("posexplode",
           })
 
 #' @details
-#' \code{create_array}: Creates a new array column. The input columns must all have the same data type.
+#' \code{create_array}: Creates a new array column. The input columns must all have the same data
+#' type.
 #'
 #' @rdname column_nonaggregate_functions
 #' @aliases create_array create_array,Column-method
@@ -3169,7 +3174,7 @@ setMethod("posexplode",
 setMethod("create_array",
           signature(x = "Column"),
           function(x, ...) {
-            jcols <- lapply(list(x, ...), function (x) {
+            jcols <- lapply(list(x, ...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })
@@ -3178,8 +3183,8 @@ setMethod("create_array",
           })
 
 #' @details
-#' \code{create_map}: Creates a new map column. The input columns must be grouped as key-value pairs,
-#' e.g. (key1, value1, key2, value2, ...).
+#' \code{create_map}: Creates a new map column. The input columns must be grouped as key-value
+#' pairs, e.g. (key1, value1, key2, value2, ...).
 #' The key columns must all have the same data type, and can't be null.
 #' The value columns must all have the same data type.
 #'
@@ -3190,7 +3195,7 @@ setMethod("create_array",
 setMethod("create_map",
           signature(x = "Column"),
           function(x, ...) {
-            jcols <- lapply(list(x, ...), function (x) {
+            jcols <- lapply(list(x, ...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })
@@ -3352,9 +3357,9 @@ setMethod("not",
           })
 
 #' @details
-#' \code{grouping_bit}: Indicates whether a specified column in a GROUP BY list is aggregated or not,
-#' returns 1 for aggregated or 0 for not aggregated in the result set. Same as \code{GROUPING} in SQL
-#' and \code{grouping} function in Scala.
+#' \code{grouping_bit}: Indicates whether a specified column in a GROUP BY list is aggregated or
+#' not, returns 1 for aggregated or 0 for not aggregated in the result set. Same as \code{GROUPING}
+#' in SQL and \code{grouping} function in Scala.
 #'
 #' @rdname column_aggregate_functions
 #' @aliases grouping_bit grouping_bit,Column-method
@@ -3412,7 +3417,7 @@ setMethod("grouping_bit",
 setMethod("grouping_id",
           signature(x = "Column"),
           function(x, ...) {
-            jcols <- lapply(list(x, ...), function (x) {
+            jcols <- lapply(list(x, ...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })