Skip to content

Commit

Permalink
Merge branch 'master' into groupby
Browse files Browse the repository at this point in the history
Conflicts:
	python/pyspark/rdd.py
	python/pyspark/shuffle.py
	python/pyspark/tests.py
  • Loading branch information
davies committed Aug 27, 2014
2 parents 85138e6 + 73b3089 commit b48cda5
Show file tree
Hide file tree
Showing 139 changed files with 3,729 additions and 908 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ Many of the example programs print usage help if no params are given.
Testing first requires [building Spark](#building-spark). Once Spark is built, tests
can be run using:

./sbt/sbt test
./dev/run-tests

## A Note About Hadoop Versions

Expand Down Expand Up @@ -118,7 +118,10 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
## A Note About Thrift JDBC server and CLI for Spark SQL

Spark SQL supports Thrift JDBC server and CLI.
See sql-programming-guide.md for more information about using the JDBC server.
See sql-programming-guide.md for more information about using the JDBC server and CLI.
You can use those features by setting `-Phive` when building Spark as follows.

$ sbt/sbt -Phive assembly

## Configuration

Expand All @@ -136,3 +139,5 @@ submitting any copyrighted material via pull request, email, or other means
you agree to license the material under the project's open source license and
warrant that you have the legal authority to do so.

Please see [Contributing to Spark wiki page](https://cwiki.apache.org/SPARK/Contributing+to+Spark)
for more information.
6 changes: 3 additions & 3 deletions bin/load-spark-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ if [ -z "$SPARK_ENV_LOADED" ]; then
# Returns the parent of the directory this script lives in.
parent_dir="$(cd `dirname $0`/..; pwd)"

use_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"}
user_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"}

if [ -f "${use_conf_dir}/spark-env.sh" ]; then
if [ -f "${user_conf_dir}/spark-env.sh" ]; then
# Promote all variable declarations to environment (exported) variables
set -a
. "${use_conf_dir}/spark-env.sh"
. "${user_conf_dir}/spark-env.sh"
set +a
fi
fi
7 changes: 6 additions & 1 deletion bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,14 @@ else
exit 1
fi
fi
JAVA_VERSION=$($RUNNER -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')

# Set JAVA_OPTS to be able to load native libraries and to set heap size
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
if [ "$JAVA_VERSION" -ge 18 ]; then
JAVA_OPTS="$OUR_JAVA_OPTS"
else
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
fi
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"

# Load extra JAVA_OPTS from conf/java-opts, if it exists
Expand Down
8 changes: 7 additions & 1 deletion bin/spark-class2.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,13 @@ rem All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SP
)

rem Set JAVA_OPTS to be able to load native libraries and to set heap size
set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
for /f "tokens=3" %%i in ('java -version 2^>^&1 ^| find "version"') do set jversion=%%i
for /f "tokens=1 delims=_" %%i in ("%jversion:~1,-1%") do set jversion=%%i
if "%jversion%" geq "1.8.0" (
set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
) else (
set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
)
rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!

rem Test whether the user has built Spark
Expand Down
36 changes: 18 additions & 18 deletions bin/spark-shell
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

cygwin=false
case "`uname`" in
CYGWIN*) cygwin=true;;
CYGWIN*) cygwin=true;;
esac

# Enter posix mode for bash
Expand All @@ -32,9 +32,9 @@ set -o posix
FWDIR="$(cd `dirname $0`/..; pwd)"

function usage() {
echo "Usage: ./bin/spark-shell [options]"
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
echo "Usage: ./bin/spark-shell [options]"
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
}

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
Expand All @@ -46,20 +46,20 @@ SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"

function main() {
if $cygwin; then
# Workaround for issue involving JLine and Cygwin
# (see http://sourceforge.net/p/jline/bugs/40/).
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
fi
if $cygwin; then
# Workaround for issue involving JLine and Cygwin
# (see http://sourceforge.net/p/jline/bugs/40/).
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
fi
}

# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
Expand Down
55 changes: 13 additions & 42 deletions bin/spark-sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
set -o posix

CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
CLASS_NOT_FOUND_EXIT_STATUS=1

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
Expand All @@ -43,52 +44,22 @@ function usage {
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
}

function ensure_arg_number {
arg_number=$1
at_least=$2

if [[ $arg_number -lt $at_least ]]; then
usage
exit 1
fi
}

if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
usage
exit 0
fi

CLI_ARGS=()
SUBMISSION_ARGS=()

while (($#)); do
case $1 in
-d | --define | --database | -f | -h | --hiveconf | --hivevar | -i | -p)
ensure_arg_number $# 2
CLI_ARGS+=("$1"); shift
CLI_ARGS+=("$1"); shift
;;
source $FWDIR/bin/utils.sh
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"

-e)
ensure_arg_number $# 2
CLI_ARGS+=("$1"); shift
CLI_ARGS+=("$1"); shift
;;
"$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_OPTS[@]}" spark-internal "${APPLICATION_OPTS[@]}"
exit_status=$?

-s | --silent)
CLI_ARGS+=("$1"); shift
;;

-v | --verbose)
# Both SparkSubmit and SparkSQLCLIDriver recognizes -v | --verbose
CLI_ARGS+=("$1")
SUBMISSION_ARGS+=("$1"); shift
;;

*)
SUBMISSION_ARGS+=("$1"); shift
;;
esac
done
if [[ exit_status -eq CLASS_NOT_FOUND_EXIT_STATUS ]]; then
echo
echo "Failed to load Spark SQL CLI main class $CLASS."
echo "You need to build Spark with -Phive."
fi

exec "$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_ARGS[@]}" spark-internal "${CLI_ARGS[@]}"
exit $exit_status
Loading

0 comments on commit b48cda5

Please sign in to comment.