Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-22063][R] Fixes lint check failures in R by latest commit sha1 ID of lint-r #19290

Closed
wants to merge 7 commits into from
2 changes: 1 addition & 1 deletion R/pkg/.lintr
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, object_name_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

object_name_linter = NULL looks required. Otherwise, it complains about inconsistent naming styles.

exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")
30 changes: 16 additions & 14 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1923,13 +1923,15 @@ setMethod("[", signature(x = "SparkDataFrame"),
#' @param i,subset (Optional) a logical expression to filter on rows.
#' For extract operator [[ and replacement operator [[<-, the indexing parameter for
#' a single Column.
#' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame.
#' @param j,select expression for the single Column or a list of columns to select from the
#' SparkDataFrame.
#' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column.
#' Otherwise, a SparkDataFrame will always be returned.
#' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}.
#' If \code{NULL}, the specified Column is dropped.
#' @param ... currently not used.
#' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns.
#' @return A new SparkDataFrame containing only the rows that meet the condition with selected
#' columns.
#' @export
#' @family SparkDataFrame functions
#' @aliases subset,SparkDataFrame-method
Expand Down Expand Up @@ -2608,12 +2610,12 @@ setMethod("merge",
} else {
# if by or both by.x and by.y have length 0, use Cartesian Product
joinRes <- crossJoin(x, y)
return (joinRes)
return(joinRes)
}

# sets alias for making colnames unique in dataframes 'x' and 'y'
colsX <- generateAliasesForIntersectedCols(x, by, suffixes[1])
colsY <- generateAliasesForIntersectedCols(y, by, suffixes[2])
colsX <- genAliasesForIntersectedCols(x, by, suffixes[1])
colsY <- genAliasesForIntersectedCols(y, by, suffixes[2])

# selects columns with their aliases from dataframes
# in case same column names are present in both data frames
Expand Down Expand Up @@ -2661,17 +2663,16 @@ setMethod("merge",
#' @param intersectedColNames a list of intersected column names of the SparkDataFrame
#' @param suffix a suffix for the column name
#' @return list of columns
#'
#' @note generateAliasesForIntersectedCols since 1.6.0
generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
#' @noRd
genAliasesForIntersectedCols <- function(x, intersectedColNames, suffix) {
allColNames <- names(x)
# sets alias for making colnames unique in dataframe 'x'
cols <- lapply(allColNames, function(colName) {
col <- getColumn(x, colName)
if (colName %in% intersectedColNames) {
newJoin <- paste(colName, suffix, sep = "")
if (newJoin %in% allColNames){
stop ("The following column name: ", newJoin, " occurs more than once in the 'DataFrame'.",
stop("The following column name: ", newJoin, " occurs more than once in the 'DataFrame'.",
"Please use different suffixes for the intersected columns.")
}
col <- alias(col, newJoin)
Expand Down Expand Up @@ -3058,7 +3059,8 @@ setMethod("describe",
#' summary(select(df, "age", "height"))
#' }
#' @note summary(SparkDataFrame) since 1.5.0
#' @note The statistics provided by \code{summary} were change in 2.3.0 use \link{describe} for previous defaults.
#' @note The statistics provided by \code{summary} were change in 2.3.0 use \link{describe} for
#' previous defaults.
#' @seealso \link{describe}
setMethod("summary",
signature(object = "SparkDataFrame"),
Expand Down Expand Up @@ -3765,8 +3767,8 @@ setMethod("checkpoint",
#'
#' Create a multi-dimensional cube for the SparkDataFrame using the specified columns.
#'
#' If grouping expression is missing \code{cube} creates a single global aggregate and is equivalent to
#' direct application of \link{agg}.
#' If grouping expression is missing \code{cube} creates a single global aggregate and is
#' equivalent to direct application of \link{agg}.
#'
#' @param x a SparkDataFrame.
#' @param ... character name(s) or Column(s) to group on.
Expand Down Expand Up @@ -3800,8 +3802,8 @@ setMethod("cube",
#'
#' Create a multi-dimensional rollup for the SparkDataFrame using the specified columns.
#'
#' If grouping expression is missing \code{rollup} creates a single global aggregate and is equivalent to
#' direct application of \link{agg}.
#' If grouping expression is missing \code{rollup} creates a single global aggregate and is
#' equivalent to direct application of \link{agg}.
#'
#' @param x a SparkDataFrame.
#' @param ... character name(s) or Column(s) to group on.
Expand Down
6 changes: 3 additions & 3 deletions R/pkg/R/RDD.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ PipelinedRDD <- function(prev, func) {
# Return the serialization mode for an RDD.
setGeneric("getSerializedMode", function(rdd, ...) { standardGeneric("getSerializedMode") })
# For normal RDDs we can directly read the serializedMode
setMethod("getSerializedMode", signature(rdd = "RDD"), function(rdd) rdd@env$serializedMode )
setMethod("getSerializedMode", signature(rdd = "RDD"), function(rdd) rdd@env$serializedMode)
# For pipelined RDDs if jrdd_val is set then serializedMode should exist
# if not we return the defaultSerialization mode of "byte" as we don't know the serialization
# mode at this point in time.
Expand All @@ -145,7 +145,7 @@ setMethod("getSerializedMode", signature(rdd = "PipelinedRDD"),
})

# The jrdd accessor function.
setMethod("getJRDD", signature(rdd = "RDD"), function(rdd) rdd@jrdd )
setMethod("getJRDD", signature(rdd = "RDD"), function(rdd) rdd@jrdd)
setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
function(rdd, serializedMode = "byte") {
if (!is.null(rdd@env$jrdd_val)) {
Expand Down Expand Up @@ -893,7 +893,7 @@ setMethod("sampleRDD",
if (withReplacement) {
count <- stats::rpois(1, fraction)
if (count > 0) {
res[ (len + 1) : (len + count) ] <- rep(list(elem), count)
res[(len + 1) : (len + count)] <- rep(list(elem), count)
len <- len + count
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/WindowSpec.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ setMethod("show", "WindowSpec",
setMethod("partitionBy",
signature(x = "WindowSpec"),
function(x, col, ...) {
stopifnot (class(col) %in% c("character", "Column"))
stopifnot(class(col) %in% c("character", "Column"))

if (class(col) == "character") {
windowSpec(callJMethod(x@sws, "partitionBy", col, list(...)))
Expand Down
2 changes: 2 additions & 0 deletions R/pkg/R/column.R
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,10 @@ setMethod("between", signature(x = "Column"),
#' @param x a Column.
#' @param dataType a character object describing the target data type.
#' See
# nolint start
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just double checked the links.

#' \href{https://spark.apache.org/docs/latest/sparkr.html#data-type-mapping-between-r-and-spark}{
#' Spark Data Types} for available data types.
# nolint end
#' @rdname cast
#' @name cast
#' @family colum_func
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/context.R
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ spark.addFile <- function(path, recursive = FALSE) {
#' spark.getSparkFilesRootDirectory()
#'}
#' @note spark.getSparkFilesRootDirectory since 2.1.0
spark.getSparkFilesRootDirectory <- function() {
spark.getSparkFilesRootDirectory <- function() { # nolint
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto: 30 length identifier limit but exposed in the doc

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one is an API...

if (Sys.getenv("SPARKR_IS_RUNNING_ON_WORKER") == "") {
# Running on driver.
callJStatic("org.apache.spark.SparkFiles", "getRootDirectory")
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/deserialize.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ readObject <- function(con) {
}

readTypedObject <- function(con, type) {
switch (type,
switch(type,
"i" = readInt(con),
"c" = readString(con),
"b" = readBoolean(con),
Expand Down
79 changes: 42 additions & 37 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ NULL
#'
#' Date time functions defined for \code{Column}.
#'
#' @param x Column to compute on. In \code{window}, it must be a time Column of \code{TimestampType}.
#' @param x Column to compute on. In \code{window}, it must be a time Column of
#' \code{TimestampType}.
#' @param format For \code{to_date} and \code{to_timestamp}, it is the string to use to parse
#' Column \code{x} to DateType or TimestampType. For \code{trunc}, it is the string
#' to use to specify the truncation method. For example, "year", "yyyy", "yy" for
Expand Down Expand Up @@ -90,8 +91,8 @@ NULL
#'
#' Math functions defined for \code{Column}.
#'
#' @param x Column to compute on. In \code{shiftLeft}, \code{shiftRight} and \code{shiftRightUnsigned},
#' this is the number of bits to shift.
#' @param x Column to compute on. In \code{shiftLeft}, \code{shiftRight} and
#' \code{shiftRightUnsigned}, this is the number of bits to shift.
#' @param y Column to compute on.
#' @param ... additional argument(s).
#' @name column_math_functions
Expand Down Expand Up @@ -480,7 +481,7 @@ setMethod("ceiling",
setMethod("coalesce",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function (x) {
jcols <- lapply(list(x, ...), function(x) {
stopifnot(class(x) == "Column")
x@jc
})
Expand Down Expand Up @@ -676,7 +677,7 @@ setMethod("crc32",
setMethod("hash",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function (x) {
jcols <- lapply(list(x, ...), function(x) {
stopifnot(class(x) == "Column")
x@jc
})
Expand Down Expand Up @@ -1310,9 +1311,9 @@ setMethod("round",
#' Also known as Gaussian rounding or bankers' rounding that rounds to the nearest even number.
#' bround(2.5, 0) = 2, bround(3.5, 0) = 4.
#'
#' @param scale round to \code{scale} digits to the right of the decimal point when \code{scale} > 0,
#' the nearest even number when \code{scale} = 0, and \code{scale} digits to the left
#' of the decimal point when \code{scale} < 0.
#' @param scale round to \code{scale} digits to the right of the decimal point when
#' \code{scale} > 0, the nearest even number when \code{scale} = 0, and \code{scale} digits
#' to the left of the decimal point when \code{scale} < 0.
#' @rdname column_math_functions
#' @aliases bround bround,Column-method
#' @export
Expand Down Expand Up @@ -2005,8 +2006,9 @@ setMethod("months_between", signature(y = "Column"),
})

#' @details
#' \code{nanvl}: Returns the first column (\code{y}) if it is not NaN, or the second column (\code{x}) if
#' the first column is NaN. Both inputs should be floating point columns (DoubleType or FloatType).
#' \code{nanvl}: Returns the first column (\code{y}) if it is not NaN, or the second column
#' (\code{x}) if the first column is NaN. Both inputs should be floating point columns
#' (DoubleType or FloatType).
#'
#' @rdname column_nonaggregate_functions
#' @aliases nanvl nanvl,Column-method
Expand Down Expand Up @@ -2061,7 +2063,7 @@ setMethod("approxCountDistinct",
setMethod("countDistinct",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(...), function (x) {
jcols <- lapply(list(...), function(x) {
stopifnot(class(x) == "Column")
x@jc
})
Expand Down Expand Up @@ -2090,7 +2092,7 @@ setMethod("countDistinct",
setMethod("concat",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function (x) {
jcols <- lapply(list(x, ...), function(x) {
stopifnot(class(x) == "Column")
x@jc
})
Expand All @@ -2110,7 +2112,7 @@ setMethod("greatest",
signature(x = "Column"),
function(x, ...) {
stopifnot(length(list(...)) > 0)
jcols <- lapply(list(x, ...), function (x) {
jcols <- lapply(list(x, ...), function(x) {
stopifnot(class(x) == "Column")
x@jc
})
Expand All @@ -2130,7 +2132,7 @@ setMethod("least",
signature(x = "Column"),
function(x, ...) {
stopifnot(length(list(...)) > 0)
jcols <- lapply(list(x, ...), function (x) {
jcols <- lapply(list(x, ...), function(x) {
stopifnot(class(x) == "Column")
x@jc
})
Expand Down Expand Up @@ -2406,8 +2408,8 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
})

#' @details
#' \code{shiftRight}: (Signed) shifts the given value numBits right. If the given value is a long value,
#' it will return a long value else it will return an integer value.
#' \code{shiftRight}: (Signed) shifts the given value numBits right. If the given value is a long
#' value, it will return a long value else it will return an integer value.
#'
#' @rdname column_math_functions
#' @aliases shiftRight shiftRight,Column,numeric-method
Expand Down Expand Up @@ -2505,9 +2507,10 @@ setMethod("format_string", signature(format = "character", x = "Column"),
})

#' @details
#' \code{from_unixtime}: Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a
#' string representing the timestamp of that moment in the current system time zone in the JVM in the
#' given format. See \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
#' \code{from_unixtime}: Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC)
#' to a string representing the timestamp of that moment in the current system time zone in the JVM
#' in the given format.
#' See \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
#' Customizing Formats} for available options.
#'
#' @rdname column_datetime_functions
Expand Down Expand Up @@ -2634,8 +2637,8 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
})

#' @details
#' \code{rand}: Generates a random column with independent and identically distributed (i.i.d.) samples
#' from U[0.0, 1.0].
#' \code{rand}: Generates a random column with independent and identically distributed (i.i.d.)
#' samples from U[0.0, 1.0].
#'
#' @rdname column_nonaggregate_functions
#' @param seed a random seed. Can be missing.
Expand Down Expand Up @@ -2664,8 +2667,8 @@ setMethod("rand", signature(seed = "numeric"),
})

#' @details
#' \code{randn}: Generates a column with independent and identically distributed (i.i.d.) samples from
#' the standard normal distribution.
#' \code{randn}: Generates a column with independent and identically distributed (i.i.d.) samples
#' from the standard normal distribution.
#'
#' @rdname column_nonaggregate_functions
#' @aliases randn randn,missing-method
Expand Down Expand Up @@ -2831,8 +2834,8 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
})

#' @details
#' \code{when}: Evaluates a list of conditions and returns one of multiple possible result expressions.
#' For unmatched expressions null is returned.
#' \code{when}: Evaluates a list of conditions and returns one of multiple possible result
#' expressions. For unmatched expressions null is returned.
#'
#' @rdname column_nonaggregate_functions
#' @param condition the condition to test on. Must be a Column expression.
Expand All @@ -2859,8 +2862,8 @@ setMethod("when", signature(condition = "Column", value = "ANY"),
})

#' @details
#' \code{ifelse}: Evaluates a list of conditions and returns \code{yes} if the conditions are satisfied.
#' Otherwise \code{no} is returned for unmatched conditions.
#' \code{ifelse}: Evaluates a list of conditions and returns \code{yes} if the conditions are
#' satisfied. Otherwise \code{no} is returned for unmatched conditions.
#'
#' @rdname column_nonaggregate_functions
#' @param test a Column expression that describes the condition.
Expand Down Expand Up @@ -2990,7 +2993,8 @@ setMethod("ntile",
})

#' @details
#' \code{percent_rank}: Returns the relative rank (i.e. percentile) of rows within a window partition.
#' \code{percent_rank}: Returns the relative rank (i.e. percentile) of rows within a window
#' partition.
#' This is computed by: (rank of row in its partition - 1) / (number of rows in the partition - 1).
#' This is equivalent to the \code{PERCENT_RANK} function in SQL.
#' The method should be used with no argument.
Expand Down Expand Up @@ -3160,7 +3164,8 @@ setMethod("posexplode",
})

#' @details
#' \code{create_array}: Creates a new array column. The input columns must all have the same data type.
#' \code{create_array}: Creates a new array column. The input columns must all have the same data
#' type.
#'
#' @rdname column_nonaggregate_functions
#' @aliases create_array create_array,Column-method
Expand All @@ -3169,7 +3174,7 @@ setMethod("posexplode",
setMethod("create_array",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function (x) {
jcols <- lapply(list(x, ...), function(x) {
stopifnot(class(x) == "Column")
x@jc
})
Expand All @@ -3178,8 +3183,8 @@ setMethod("create_array",
})

#' @details
#' \code{create_map}: Creates a new map column. The input columns must be grouped as key-value pairs,
#' e.g. (key1, value1, key2, value2, ...).
#' \code{create_map}: Creates a new map column. The input columns must be grouped as key-value
#' pairs, e.g. (key1, value1, key2, value2, ...).
#' The key columns must all have the same data type, and can't be null.
#' The value columns must all have the same data type.
#'
Expand All @@ -3190,7 +3195,7 @@ setMethod("create_array",
setMethod("create_map",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function (x) {
jcols <- lapply(list(x, ...), function(x) {
stopifnot(class(x) == "Column")
x@jc
})
Expand Down Expand Up @@ -3352,9 +3357,9 @@ setMethod("not",
})

#' @details
#' \code{grouping_bit}: Indicates whether a specified column in a GROUP BY list is aggregated or not,
#' returns 1 for aggregated or 0 for not aggregated in the result set. Same as \code{GROUPING} in SQL
#' and \code{grouping} function in Scala.
#' \code{grouping_bit}: Indicates whether a specified column in a GROUP BY list is aggregated or
#' not, returns 1 for aggregated or 0 for not aggregated in the result set. Same as \code{GROUPING}
#' in SQL and \code{grouping} function in Scala.
#'
#' @rdname column_aggregate_functions
#' @aliases grouping_bit grouping_bit,Column-method
Expand Down Expand Up @@ -3412,7 +3417,7 @@ setMethod("grouping_bit",
setMethod("grouping_id",
signature(x = "Column"),
function(x, ...) {
jcols <- lapply(list(x, ...), function (x) {
jcols <- lapply(list(x, ...), function(x) {
stopifnot(class(x) == "Column")
x@jc
})
Expand Down
Loading