Skip to content

Commit

Permalink
Resolve conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
beliefer committed Jan 28, 2020
2 parents e7b4d43 + cc986a7 commit 7825bc3
Show file tree
Hide file tree
Showing 417 changed files with 29,829 additions and 26,771 deletions.
5 changes: 5 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
*.bat text eol=crlf
*.cmd text eol=crlf
*.java text eol=lf
*.scala text eol=lf
*.xml text eol=lf
*.py text eol=lf
*.R text eol=lf
8 changes: 0 additions & 8 deletions .github/workflows/master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,6 @@ jobs:
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
mkdir -p ~/.m2
# `Maven Central` is too flaky in terms of downloading artifacts in `GitHub Action` environment.
# `Google Maven Central Mirror` is too slow in terms of sycing upstream. To get the best combination,
# 1) we set `Google Maven Central` as a mirror of `central` in `GitHub Action` environment only.
# 2) we duplicates `Maven Central` in pom.xml with ID `central_without_mirror`.
# In other words, in GitHub Action environment, `central` is mirrored by `Google Maven Central` first.
# If `Google Maven Central` doesn't provide the artifact due to its slowness, `central_without_mirror` will be used.
# Note that we aim to achieve the above while keeping the existing behavior of non-`GitHub Action` environment unchanged.
echo "<settings><mirrors><mirror><id>google-maven-central</id><name>GCS Maven Central mirror</name><url>https://maven-central.storage-download.googleapis.com/repos/central/data/</url><mirrorOf>central</mirrorOf></mirror></mirrors></settings>" > ~/.m2/settings.xml
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install
rm -rf ~/.m2/repository/org/apache/spark
Expand Down
1 change: 1 addition & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ exportMethods("%<=>%",
"ntile",
"otherwise",
"over",
"overlay",
"percent_rank",
"pmod",
"posexplode",
Expand Down
41 changes: 37 additions & 4 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ NULL
#' format to. See 'Details'.
#' }
#' @param y Column to compute on.
#' @param pos In \itemize{
#' \item \code{locate}: a start position of search.
#' \item \code{overlay}: a start postiton for replacement.
#' }
#' @param len In \itemize{
#' \item \code{lpad} the maximum length of each output result.
#' \item \code{overlay} a number of bytes to replace.
#' }
#' @param ... additional Columns.
#' @name column_string_functions
#' @rdname column_string_functions
Expand Down Expand Up @@ -1319,6 +1327,35 @@ setMethod("negate",
column(jc)
})

#' @details
#' \code{overlay}: Overlay the specified portion of \code{x} with \code{replace},
#' starting from byte position \code{pos} of \code{src} and proceeding for
#' \code{len} bytes.
#'
#' @param replace a Column with replacement.
#'
#' @rdname column_string_functions
#' @aliases overlay overlay,Column-method,numericOrColumn-method
#' @note overlay since 3.0.0
setMethod("overlay",
signature(x = "Column", replace = "Column", pos = "numericOrColumn"),
function(x, replace, pos, len = -1) {
if (is.numeric(pos)) {
pos <- lit(as.integer(pos))
}

if (is.numeric(len)) {
len <- lit(as.integer(len))
}

jc <- callJStatic(
"org.apache.spark.sql.functions", "overlay",
x@jc, replace@jc, pos@jc, len@jc
)

column(jc)
})

#' @details
#' \code{quarter}: Extracts the quarter as an integer from a given date/timestamp/string.
#'
Expand Down Expand Up @@ -2459,7 +2496,6 @@ setMethod("schema_of_csv", signature(x = "characterOrColumn"),
#' @note from_utc_timestamp since 1.5.0
setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
function(y, x) {
.Deprecated(msg = "from_utc_timestamp is deprecated. See SPARK-25496.")
jc <- callJStatic("org.apache.spark.sql.functions", "from_utc_timestamp", y@jc, x)
column(jc)
})
Expand Down Expand Up @@ -2518,7 +2554,6 @@ setMethod("next_day", signature(y = "Column", x = "character"),
#' @note to_utc_timestamp since 1.5.0
setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
function(y, x) {
.Deprecated(msg = "to_utc_timestamp is deprecated. See SPARK-25496.")
jc <- callJStatic("org.apache.spark.sql.functions", "to_utc_timestamp", y@jc, x)
column(jc)
})
Expand Down Expand Up @@ -2819,7 +2854,6 @@ setMethod("window", signature(x = "Column"),
#'
#' @param substr a character string to be matched.
#' @param str a Column where matches are sought for each entry.
#' @param pos start position of search.
#' @rdname column_string_functions
#' @aliases locate locate,character,Column-method
#' @note locate since 1.5.0
Expand All @@ -2834,7 +2868,6 @@ setMethod("locate", signature(substr = "character", str = "Column"),
#' @details
#' \code{lpad}: Left-padded with pad to a length of len.
#'
#' @param len maximum length of each output result.
#' @param pad a character string to be padded with.
#' @rdname column_string_functions
#' @aliases lpad lpad,Column,numeric,character-method
Expand Down
4 changes: 4 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -1149,6 +1149,10 @@ setGeneric("ntile", function(x) { standardGeneric("ntile") })
#' @name NULL
setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })

#' @rdname column_string_functions
#' @name NULL
setGeneric("overlay", function(x, replace, pos, ...) { standardGeneric("overlay") })

#' @rdname column_window_functions
#' @name NULL
setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_rank") })
Expand Down
9 changes: 6 additions & 3 deletions R/pkg/R/mllib_tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara
#' "error" (throw an error), "keep" (put invalid data in
#' a special additional bucket, at index numLabels). Default
#' is "error".
#' @param bootstrap Whether bootstrap samples are used when building trees.
#' @param ... additional arguments passed to the method.
#' @aliases spark.randomForest,SparkDataFrame,formula-method
#' @return \code{spark.randomForest} returns a fitted Random Forest model.
Expand Down Expand Up @@ -428,7 +429,8 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
featureSubsetStrategy = "auto", seed = NULL, subsamplingRate = 1.0,
minInstancesPerNode = 1, minInfoGain = 0.0, checkpointInterval = 10,
maxMemoryInMB = 256, cacheNodeIds = FALSE,
handleInvalid = c("error", "keep", "skip")) {
handleInvalid = c("error", "keep", "skip"),
bootstrap = TRUE) {
type <- match.arg(type)
formula <- paste(deparse(formula), collapse = "")
if (!is.null(seed)) {
Expand All @@ -445,7 +447,8 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
as.numeric(minInfoGain), as.integer(checkpointInterval),
as.character(featureSubsetStrategy), seed,
as.numeric(subsamplingRate),
as.integer(maxMemoryInMB), as.logical(cacheNodeIds))
as.integer(maxMemoryInMB), as.logical(cacheNodeIds),
as.logical(bootstrap))
new("RandomForestRegressionModel", jobj = jobj)
},
classification = {
Expand All @@ -460,7 +463,7 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
as.character(featureSubsetStrategy), seed,
as.numeric(subsamplingRate),
as.integer(maxMemoryInMB), as.logical(cacheNodeIds),
handleInvalid)
handleInvalid, as.logical(bootstrap))
new("RandomForestClassificationModel", jobj = jobj)
}
)
Expand Down
4 changes: 4 additions & 0 deletions R/pkg/tests/fulltests/data/test_utils_utf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"name": "안녕하세요"}
{"name": "您好", "age": 30}
{"name": "こんにちは", "age": 19}
{"name": "Xin chào"}
2 changes: 1 addition & 1 deletion R/pkg/tests/fulltests/test_mllib_tree.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ test_that("spark.randomForest", {
# regression
data <- suppressWarnings(createDataFrame(longley))
model <- spark.randomForest(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16,
numTrees = 1, seed = 1)
numTrees = 1, seed = 1, bootstrap = FALSE)

predictions <- collect(predict(model, data))
expect_equal(predictions$prediction, c(60.323, 61.122, 60.171, 61.187,
Expand Down
49 changes: 24 additions & 25 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -848,24 +848,31 @@ test_that("collect() and take() on a DataFrame return the same number of rows an
})

test_that("collect() support Unicode characters", {
lines <- c("{\"name\":\"안녕하세요\"}",
"{\"name\":\"您好\", \"age\":30}",
"{\"name\":\"こんにちは\", \"age\":19}",
"{\"name\":\"Xin chào\"}")
jsonPath <- file.path(
Sys.getenv("SPARK_HOME"),
"R", "pkg", "tests", "fulltests", "data",
"test_utils_utf.json"
)

lines <- readLines(jsonPath, encoding = "UTF-8")

jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
writeLines(lines, jsonPath)
expected <- regmatches(lines, gregexpr('(?<="name": ").*?(?=")', lines, perl = TRUE))

df <- read.df(jsonPath, "json")
rdf <- collect(df)
expect_true(is.data.frame(rdf))
expect_equal(rdf$name[1], markUtf8("안녕하세요"))
expect_equal(rdf$name[2], markUtf8("您好"))
expect_equal(rdf$name[3], markUtf8("こんにちは"))
expect_equal(rdf$name[4], markUtf8("Xin chào"))
expect_equal(rdf$name[1], expected[[1]])
expect_equal(rdf$name[2], expected[[2]])
expect_equal(rdf$name[3], expected[[3]])
expect_equal(rdf$name[4], expected[[4]])

df1 <- createDataFrame(rdf)
expect_equal(collect(where(df1, df1$name == markUtf8("您好")))$name, markUtf8("您好"))
expect_equal(
collect(
where(df1, df1$name == expected[[2]])
)$name,
expected[[2]]
)
})

test_that("multiple pipeline transformations result in an RDD with the correct values", {
Expand Down Expand Up @@ -1405,6 +1412,8 @@ test_that("column functions", {
trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm")
c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week", c) +
date_trunc("quarter", c) + current_date() + current_timestamp()
c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) +
overlay(c1, c2, 3, 4)

# Test if base::is.nan() is exposed
expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
Expand Down Expand Up @@ -1905,20 +1914,10 @@ test_that("date functions on a DataFrame", {
df2 <- createDataFrame(l2)
expect_equal(collect(select(df2, minute(df2$b)))[, 1], c(34, 24))
expect_equal(collect(select(df2, second(df2$b)))[, 1], c(0, 34))
conf <- callJMethod(sparkSession, "conf")
isUtcTimestampFuncEnabled <- callJMethod(conf, "get", "spark.sql.legacy.utcTimestampFunc.enabled")
callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", "true")
tryCatch({
# Both from_utc_timestamp and to_utc_timestamp are deprecated as of SPARK-25496
expect_equal(suppressWarnings(collect(select(df2, from_utc_timestamp(df2$b, "JST"))))[, 1],
c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
expect_equal(suppressWarnings(collect(select(df2, to_utc_timestamp(df2$b, "JST"))))[, 1],
c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
},
finally = {
# Reverting the conf back
callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", isUtcTimestampFuncEnabled)
})
expect_equal(collect(select(df2, from_utc_timestamp(df2$b, "JST")))[, 1],
c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1],
c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0)
Expand Down
10 changes: 8 additions & 2 deletions R/pkg/tests/fulltests/test_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ test_that("cleanClosure on R functions", {
lapply(x, g) + 1 # Test for capturing function call "g"'s closure as a argument of lapply.
l$field[1, 1] <- 3 # Test for access operators `$`.
res <- defUse + l$field[1, ] # Test for def-use chain of "defUse", and "" symbol.
f(res) # Test for recursive calls.
# Enable once SPARK-30629 is fixed
# nolint start
# f(res) # Test for recursive calls.
# nolint end
}
newF <- cleanClosure(f)
env <- environment(newF)
Expand All @@ -101,7 +104,10 @@ test_that("cleanClosure on R functions", {
# nolint end
expect_true("g" %in% ls(env))
expect_true("l" %in% ls(env))
expect_true("f" %in% ls(env))
# Enable once SPARK-30629 is fixed
# nolint start
# expect_true("f" %in% ls(env))
# nolint end
expect_equal(get("l", envir = env, inherits = FALSE), l)
# "y" should be in the environment of g.
newG <- get("g", envir = env, inherits = FALSE)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ and Structured Streaming for stream processing.

<https://spark.apache.org/>

[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7)
[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7-hive-2.3/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7-hive-2.3)
[![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.unsafe.types;

import org.apache.spark.annotation.Unstable;

import java.io.Serializable;
import java.math.BigDecimal;
import java.time.Duration;
Expand All @@ -27,8 +29,21 @@
import static org.apache.spark.sql.catalyst.util.DateTimeConstants.*;

/**
* The internal representation of interval type.
* The class representing calendar intervals. The calendar interval is stored internally in
* three components:
* <ul>
* <li>an integer value representing the number of `months` in this interval,</li>
* <li>an integer value representing the number of `days` in this interval,</li>
* <li>a long value representing the number of `microseconds` in this interval.</li>
* </ul>
*
* The `months` and `days` are not units of time with a constant length (unlike hours, seconds), so
* they are two separated fields from microseconds. One month may be equal to 28, 29, 30 or 31 days
* and one day may be equal to 23, 24 or 25 hours (daylight saving).
*
* @since 3.0.0
*/
@Unstable
public final class CalendarInterval implements Serializable {
public final int months;
public final int days;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,36 @@ public void equalsTest() {
assertEquals(i1, i6);
}

@Test
public void toStringTest() {
CalendarInterval i;

i = new CalendarInterval(0, 0, 0);
assertEquals("0 seconds", i.toString());

i = new CalendarInterval(34, 0, 0);
assertEquals("2 years 10 months", i.toString());

i = new CalendarInterval(-34, 0, 0);
assertEquals("-2 years -10 months", i.toString());

i = new CalendarInterval(0, 31, 0);
assertEquals("31 days", i.toString());

i = new CalendarInterval(0, -31, 0);
assertEquals("-31 days", i.toString());

i = new CalendarInterval(0, 0, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123);
assertEquals("3 hours 13 minutes 0.000123 seconds", i.toString());

i = new CalendarInterval(0, 0, -3 * MICROS_PER_HOUR - 13 * MICROS_PER_MINUTE - 123);
assertEquals("-3 hours -13 minutes -0.000123 seconds", i.toString());

i = new CalendarInterval(34, 31, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123);
assertEquals("2 years 10 months 31 days 3 hours 13 minutes 0.000123 seconds",
i.toString());
}

@Test
public void periodAndDurationTest() {
CalendarInterval interval = new CalendarInterval(120, -40, 123456);
Expand Down
Loading

0 comments on commit 7825bc3

Please sign in to comment.