Skip to content

Commit

Permalink
Merge pull request apache-spark-on-k8s#176 from palantir/rk/merge-ups…
Browse files Browse the repository at this point in the history
…tream
  • Loading branch information
robert3005 authored May 5, 2017
2 parents 1e7eac5 + f53fdff commit 6f22d26
Show file tree
Hide file tree
Showing 1,109 changed files with 41,438 additions and 12,430 deletions.
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,4 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
(MIT License) RowsGroup (http://datatables.net/license/mit)
(MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
(MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
(MIT License) machinist (https://github.com/typelevel/machinist)
20 changes: 10 additions & 10 deletions R/check-cran.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@
set -o pipefail
set -e

FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
pushd $FWDIR > /dev/null
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
pushd "$FWDIR" > /dev/null

. $FWDIR/find-r.sh
. "$FWDIR/find-r.sh"

# Install the package (this is required for code in vignettes to run when building it later)
# Build the latest docs, but not vignettes, which is built with the package next
. $FWDIR/install-dev.sh
. "$FWDIR/install-dev.sh"

# Build source package with vignettes
SPARK_HOME="$(cd "${FWDIR}"/..; pwd)"
. "${SPARK_HOME}"/bin/load-spark-env.sh
. "${SPARK_HOME}/bin/load-spark-env.sh"
if [ -f "${SPARK_HOME}/RELEASE" ]; then
SPARK_JARS_DIR="${SPARK_HOME}/jars"
else
Expand All @@ -40,16 +40,16 @@ fi

if [ -d "$SPARK_JARS_DIR" ]; then
# Build a zip file containing the source package with vignettes
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD build "$FWDIR/pkg"

find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete
else
echo "Error Spark JARs not found in $SPARK_HOME"
echo "Error Spark JARs not found in '$SPARK_HOME'"
exit 1
fi

# Run check as-cran.
VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
VERSION=`grep Version "$FWDIR/pkg/DESCRIPTION" | awk '{print $NF}'`

CRAN_CHECK_OPTIONS="--as-cran"

Expand All @@ -67,10 +67,10 @@ echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"

if [ -n "$NO_TESTS" ] && [ -n "$NO_MANUAL" ]
then
"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
"$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
else
# This will run tests and/or build vignettes, and require SPARK_HOME
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
fi

popd > /dev/null
10 changes: 5 additions & 5 deletions R/create-docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,23 @@ export FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
export SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)"

# Required for setting SPARK_SCALA_VERSION
. "${SPARK_HOME}"/bin/load-spark-env.sh
. "${SPARK_HOME}/bin/load-spark-env.sh"

echo "Using Scala $SPARK_SCALA_VERSION"

pushd $FWDIR > /dev/null
. $FWDIR/find-r.sh
pushd "$FWDIR" > /dev/null
. "$FWDIR/find-r.sh"

# Install the package (this will also generate the Rd files)
. $FWDIR/install-dev.sh
. "$FWDIR/install-dev.sh"

# Now create HTML files

# knit_rd puts html in current working directory
mkdir -p pkg/html
pushd pkg/html

"$R_SCRIPT_PATH/"Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
"$R_SCRIPT_PATH/Rscript" -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'

popd

Expand Down
8 changes: 4 additions & 4 deletions R/create-rd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
set -o pipefail
set -e

FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
pushd $FWDIR > /dev/null
. $FWDIR/find-r.sh
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
pushd "$FWDIR" > /dev/null
. "$FWDIR/find-r.sh"

# Generate Rd files if devtools is installed
"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
"$R_SCRIPT_PATH/Rscript" -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
14 changes: 7 additions & 7 deletions R/install-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,21 @@
set -o pipefail
set -e

FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
LIB_DIR="$FWDIR/lib"

mkdir -p $LIB_DIR
mkdir -p "$LIB_DIR"

pushd $FWDIR > /dev/null
. $FWDIR/find-r.sh
pushd "$FWDIR" > /dev/null
. "$FWDIR/find-r.sh"

. $FWDIR/create-rd.sh
. "$FWDIR/create-rd.sh"

# Install SparkR to $LIB_DIR
"$R_SCRIPT_PATH/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
"$R_SCRIPT_PATH/R" CMD INSTALL --library="$LIB_DIR" "$FWDIR/pkg/"

# Zip the SparkR package so that it can be distributed to worker nodes on YARN
cd $LIB_DIR
cd "$LIB_DIR"
jar cfM "$LIB_DIR/sparkr.zip" SparkR

popd > /dev/null
20 changes: 10 additions & 10 deletions R/install-source-package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,28 @@
set -o pipefail
set -e

FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
pushd $FWDIR > /dev/null
. $FWDIR/find-r.sh
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
pushd "$FWDIR" > /dev/null
. "$FWDIR/find-r.sh"

if [ -z "$VERSION" ]; then
VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
VERSION=`grep Version "$FWDIR/pkg/DESCRIPTION" | awk '{print $NF}'`
fi

if [ ! -f "$FWDIR"/SparkR_"$VERSION".tar.gz ]; then
echo -e "R source package file $FWDIR/SparkR_$VERSION.tar.gz is not found."
if [ ! -f "$FWDIR/SparkR_$VERSION.tar.gz" ]; then
echo -e "R source package file '$FWDIR/SparkR_$VERSION.tar.gz' is not found."
echo -e "Please build R source package with check-cran.sh"
exit -1;
fi

echo "Removing lib path and installing from source package"
LIB_DIR="$FWDIR/lib"
rm -rf $LIB_DIR
mkdir -p $LIB_DIR
"$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR
rm -rf "$LIB_DIR"
mkdir -p "$LIB_DIR"
"$R_SCRIPT_PATH/R" CMD INSTALL "SparkR_$VERSION.tar.gz" --library="$LIB_DIR"

# Zip the SparkR package so that it can be distributed to worker nodes on YARN
pushd $LIB_DIR > /dev/null
pushd "$LIB_DIR" > /dev/null
jar cfM "$LIB_DIR/sparkr.zip" SparkR
popd > /dev/null

Expand Down
2 changes: 1 addition & 1 deletion R/pkg/.lintr
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
linters: with_defaults(line_length_linter(100), camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")
3 changes: 3 additions & 0 deletions R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Collate:
'WindowSpec.R'
'backend.R'
'broadcast.R'
'catalog.R'
'client.R'
'context.R'
'deserialize.R'
Expand All @@ -43,6 +44,7 @@ Collate:
'jvm.R'
'mllib_classification.R'
'mllib_clustering.R'
'mllib_fpm.R'
'mllib_recommendation.R'
'mllib_regression.R'
'mllib_stat.R'
Expand All @@ -51,6 +53,7 @@ Collate:
'serialize.R'
'sparkR.R'
'stats.R'
'streaming.R'
'types.R'
'utils.R'
'window.R'
Expand Down
48 changes: 46 additions & 2 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ exportMethods("glm",
"spark.randomForest",
"spark.gbt",
"spark.bisectingKmeans",
"spark.svmLinear")
"spark.svmLinear",
"spark.fpGrowth",
"spark.freqItemsets",
"spark.associationRules")

# Job group lifecycle management methods
export("setJobGroup",
Expand All @@ -82,6 +85,7 @@ exportMethods("arrange",
"as.data.frame",
"attach",
"cache",
"checkpoint",
"coalesce",
"collect",
"colnames",
Expand All @@ -97,6 +101,7 @@ exportMethods("arrange",
"createOrReplaceTempView",
"crossJoin",
"crosstab",
"cube",
"dapply",
"dapplyCollect",
"describe",
Expand All @@ -118,9 +123,11 @@ exportMethods("arrange",
"group_by",
"groupBy",
"head",
"hint",
"insertInto",
"intersect",
"isLocal",
"isStreaming",
"join",
"limit",
"merge",
Expand All @@ -138,6 +145,7 @@ exportMethods("arrange",
"registerTempTable",
"rename",
"repartition",
"rollup",
"sample",
"sample_frac",
"sampleBy",
Expand Down Expand Up @@ -169,12 +177,14 @@ exportMethods("arrange",
"write.json",
"write.orc",
"write.parquet",
"write.stream",
"write.text",
"write.ml")

exportClasses("Column")

exportMethods("%in%",
exportMethods("%<=>%",
"%in%",
"abs",
"acos",
"add_months",
Expand All @@ -197,6 +207,8 @@ exportMethods("%in%",
"cbrt",
"ceil",
"ceiling",
"collect_list",
"collect_set",
"column",
"concat",
"concat_ws",
Expand All @@ -207,6 +219,8 @@ exportMethods("%in%",
"count",
"countDistinct",
"crc32",
"create_array",
"create_map",
"hash",
"cume_dist",
"date_add",
Expand All @@ -222,6 +236,7 @@ exportMethods("%in%",
"endsWith",
"exp",
"explode",
"explode_outer",
"expm1",
"expr",
"factorial",
Expand All @@ -235,12 +250,15 @@ exportMethods("%in%",
"getField",
"getItem",
"greatest",
"grouping_bit",
"grouping_id",
"hex",
"histogram",
"hour",
"hypot",
"ifelse",
"initcap",
"input_file_name",
"instr",
"isNaN",
"isNotNull",
Expand Down Expand Up @@ -278,18 +296,21 @@ exportMethods("%in%",
"nanvl",
"negate",
"next_day",
"not",
"ntile",
"otherwise",
"over",
"percent_rank",
"pmod",
"posexplode",
"posexplode_outer",
"quarter",
"rand",
"randn",
"rank",
"regexp_extract",
"regexp_replace",
"repeat_string",
"reverse",
"rint",
"rlike",
Expand All @@ -313,6 +334,7 @@ exportMethods("%in%",
"sort_array",
"soundex",
"spark_partition_id",
"split_string",
"stddev",
"stddev_pop",
"stddev_samp",
Expand Down Expand Up @@ -355,17 +377,29 @@ export("as.DataFrame",
"clearCache",
"createDataFrame",
"createExternalTable",
"createTable",
"currentDatabase",
"dropTempTable",
"dropTempView",
"jsonFile",
"listColumns",
"listDatabases",
"listFunctions",
"listTables",
"loadDF",
"parquetFile",
"read.df",
"read.jdbc",
"read.json",
"read.orc",
"read.parquet",
"read.stream",
"read.text",
"recoverPartitions",
"refreshByPath",
"refreshTable",
"setCheckpointDir",
"setCurrentDatabase",
"spark.lapply",
"spark.addFile",
"spark.getSparkFilesRootDirectory",
Expand Down Expand Up @@ -402,6 +436,16 @@ export("partitionBy",
export("windowPartitionBy",
"windowOrderBy")

exportClasses("StreamingQuery")

export("awaitTermination",
"isActive",
"lastProgress",
"queryName",
"status",
"stopQuery")


S3method(print, jobj)
S3method(print, structField)
S3method(print, structType)
Expand Down
Loading

0 comments on commit 6f22d26

Please sign in to comment.