Skip to content

Commit

Permalink
Merged master and resolved conflict.
Browse files Browse the repository at this point in the history
  • Loading branch information
Michelangelo D'Agostino committed Oct 20, 2014
2 parents a6ffb96 + d1966f3 commit 2481a2a
Show file tree
Hide file tree
Showing 544 changed files with 13,348 additions and 4,712 deletions.
10 changes: 10 additions & 0 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -349,5 +349,15 @@
</plugins>
</build>
</profile>
<profile>
<id>kinesis-asl</id>
<dependencies>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${commons.httpclient.version}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>
2 changes: 1 addition & 1 deletion bagel/src/test/resources/log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
log4j.appender.file.file=target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.eclipse.jetty=WARN
8 changes: 7 additions & 1 deletion bin/compute-classpath.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@ rem Load environment variables from conf\spark-env.cmd, if it exists
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"

rem Build up classpath
set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%;%FWDIR%conf
set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%

if not "x%SPARK_CONF_DIR%"=="x" (
set CLASSPATH=%CLASSPATH%;%SPARK_CONF_DIR%
) else (
set CLASSPATH=%CLASSPATH%;%FWDIR%conf
)

if exist "%FWDIR%RELEASE" (
for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
Expand Down
8 changes: 7 additions & 1 deletion bin/compute-classpath.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,14 @@ FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

. "$FWDIR"/bin/load-spark-env.sh

CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH"

# Build up classpath
CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf"
if [ -n "$SPARK_CONF_DIR" ]; then
CLASSPATH="$CLASSPATH:$SPARK_CONF_DIR"
else
CLASSPATH="$CLASSPATH:$FWDIR/conf"
fi

ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION"

Expand Down
55 changes: 40 additions & 15 deletions bin/pyspark
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,44 @@ fi

. "$FWDIR"/bin/load-spark-env.sh

# Figure out which Python executable to use
# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
# executable, while the worker would still be launched using PYSPARK_PYTHON.
#
# In Spark 1.2, we removed the documentation of the IPYTHON and IPYTHON_OPTS variables and added
# PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS to allow IPython to be used for the driver.
# Now, users can simply set PYSPARK_DRIVER_PYTHON=ipython to use IPython and set
# PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
# (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook'). This supports full customization of the IPython
# and executor Python executables.
#
# For backwards-compatibility, we retain the old IPYTHON and IPYTHON_OPTS variables.

# Determine the Python executable to use if PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON isn't set:
if hash python2.7 2>/dev/null; then
# Attempt to use Python 2.7, if installed:
DEFAULT_PYTHON="python2.7"
else
DEFAULT_PYTHON="python"
fi

# Determine the Python executable to use for the driver:
if [[ -n "$IPYTHON_OPTS" || "$IPYTHON" == "1" ]]; then
# If IPython options are specified, assume user wants to run IPython
# (for backwards-compatibility)
PYSPARK_DRIVER_PYTHON_OPTS="$PYSPARK_DRIVER_PYTHON_OPTS $IPYTHON_OPTS"
PYSPARK_DRIVER_PYTHON="ipython"
elif [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"$DEFAULT_PYTHON"}"
fi

# Determine the Python executable to use for the executors:
if [[ -z "$PYSPARK_PYTHON" ]]; then
PYSPARK_PYTHON="python"
if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && $DEFAULT_PYTHON != "python2.7" ]]; then
echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2
exit 1
else
PYSPARK_PYTHON="$DEFAULT_PYTHON"
fi
fi
export PYSPARK_PYTHON

Expand All @@ -64,11 +99,6 @@ export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
export PYTHONSTARTUP="$FWDIR/python/pyspark/shell.py"

# If IPython options are specified, assume user wants to run IPython
if [[ -n "$IPYTHON_OPTS" ]]; then
IPYTHON=1
fi

# Build up arguments list manually to preserve quotes and backslashes.
# We export Spark submit arguments as an environment variable because shell.py must run as a
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
Expand All @@ -88,9 +118,9 @@ if [[ -n "$SPARK_TESTING" ]]; then
unset YARN_CONF_DIR
unset HADOOP_CONF_DIR
if [[ -n "$PYSPARK_DOC_TEST" ]]; then
exec "$PYSPARK_PYTHON" -m doctest $1
exec "$PYSPARK_DRIVER_PYTHON" -m doctest $1
else
exec "$PYSPARK_PYTHON" $1
exec "$PYSPARK_DRIVER_PYTHON" $1
fi
exit
fi
Expand All @@ -106,10 +136,5 @@ if [[ "$1" =~ \.py$ ]]; then
else
# PySpark shell requires special handling downstream
export PYSPARK_SHELL=1
# Only use ipython if no command line arguments were provided [SPARK-1134]
if [[ "$IPYTHON" = "1" ]]; then
exec ${PYSPARK_PYTHON:-ipython} $IPYTHON_OPTS
else
exec "$PYSPARK_PYTHON"
fi
exec "$PYSPARK_DRIVER_PYTHON" $PYSPARK_DRIVER_PYTHON_OPTS
fi
2 changes: 1 addition & 1 deletion bin/pyspark2.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*
)
if [%FOUND_JAR%] == [0] (
echo Failed to find Spark assembly JAR.
echo You need to build Spark with sbt\sbt assembly before running this program.
echo You need to build Spark before running this program.
goto exit
)
:skip_build_test
Expand Down
2 changes: 1 addition & 1 deletion bin/run-example2.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ if exist "%FWDIR%RELEASE" (
)
if "x%SPARK_EXAMPLES_JAR%"=="x" (
echo Failed to find Spark examples assembly JAR.
echo You need to build Spark with sbt\sbt assembly before running this program.
echo You need to build Spark before running this program.
goto exit
)

Expand Down
4 changes: 2 additions & 2 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ else
exit 1
fi
fi
JAVA_VERSION=$("$RUNNER" -version 2>&1 | sed 's/.* version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
JAVA_VERSION=$("$RUNNER" -version 2>&1 | grep 'version' | sed 's/.* version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')

# Set JAVA_OPTS to be able to load native libraries and to set heap size
if [ "$JAVA_VERSION" -ge 18 ]; then
Expand Down Expand Up @@ -146,7 +146,7 @@ fi
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
if test -z "$SPARK_TOOLS_JAR"; then
echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SCALA_VERSION/" 1>&2
echo "You need to build spark before running $1." 1>&2
echo "You need to build Spark before running $1." 1>&2
exit 1
fi
CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
Expand Down
2 changes: 1 addition & 1 deletion bin/spark-class2.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*
)
if "%FOUND_JAR%"=="0" (
echo Failed to find Spark assembly JAR.
echo You need to build Spark with sbt\sbt assembly before running this program.
echo You need to build Spark before running this program.
goto exit
)
:skip_build_test
Expand Down
5 changes: 3 additions & 2 deletions bin/spark-shell.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ rem See the License for the specific language governing permissions and
rem limitations under the License.
rem

set SPARK_HOME=%~dp0..
rem This is the entry point for running Spark shell. To avoid polluting the
rem environment, it just launches a new cmd to do the real work.

cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell
cmd /V /E /C %~dp0spark-shell2.cmd %*
22 changes: 22 additions & 0 deletions bin/spark-shell2.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
@echo off

rem
rem Licensed to the Apache Software Foundation (ASF) under one or more
rem contributor license agreements. See the NOTICE file distributed with
rem this work for additional information regarding copyright ownership.
rem The ASF licenses this file to You under the Apache License, Version 2.0
rem (the "License"); you may not use this file except in compliance with
rem the License. You may obtain a copy of the License at
rem
rem http://www.apache.org/licenses/LICENSE-2.0
rem
rem Unless required by applicable law or agreed to in writing, software
rem distributed under the License is distributed on an "AS IS" BASIS,
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
rem See the License for the specific language governing permissions and
rem limitations under the License.
rem

set SPARK_HOME=%~dp0..

cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell
12 changes: 1 addition & 11 deletions bin/spark-sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
set -o posix

CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
CLASS_NOT_FOUND_EXIT_STATUS=101

# Figure out where Spark is installed
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
Expand Down Expand Up @@ -53,13 +52,4 @@ source "$FWDIR"/bin/utils.sh
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"

"$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_OPTS[@]}" spark-internal "${APPLICATION_OPTS[@]}"
exit_status=$?

if [[ exit_status -eq CLASS_NOT_FOUND_EXIT_STATUS ]]; then
echo
echo "Failed to load Spark SQL CLI main class $CLASS."
echo "You need to build Spark with -Phive."
fi

exit $exit_status
exec "$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_OPTS[@]}" spark-internal "${APPLICATION_OPTS[@]}"
51 changes: 3 additions & 48 deletions bin/spark-submit.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,52 +17,7 @@ rem See the License for the specific language governing permissions and
rem limitations under the License.
rem

rem NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!
rem This is the entry point for running Spark submit. To avoid polluting the
rem environment, it just launches a new cmd to do the real work.

set SPARK_HOME=%~dp0..
set ORIG_ARGS=%*

rem Reset the values of all variables used
set SPARK_SUBMIT_DEPLOY_MODE=client
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
set SPARK_SUBMIT_DRIVER_MEMORY=
set SPARK_SUBMIT_LIBRARY_PATH=
set SPARK_SUBMIT_CLASSPATH=
set SPARK_SUBMIT_OPTS=
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=

:loop
if [%1] == [] goto continue
if [%1] == [--deploy-mode] (
set SPARK_SUBMIT_DEPLOY_MODE=%2
) else if [%1] == [--properties-file] (
set SPARK_SUBMIT_PROPERTIES_FILE=%2
) else if [%1] == [--driver-memory] (
set SPARK_SUBMIT_DRIVER_MEMORY=%2
) else if [%1] == [--driver-library-path] (
set SPARK_SUBMIT_LIBRARY_PATH=%2
) else if [%1] == [--driver-class-path] (
set SPARK_SUBMIT_CLASSPATH=%2
) else if [%1] == [--driver-java-options] (
set SPARK_SUBMIT_OPTS=%2
)
shift
goto loop
:continue

rem For client mode, the driver will be launched in the same JVM that launches
rem SparkSubmit, so we may need to read the properties file for any extra class
rem paths, library paths, java options and memory early on. Otherwise, it will
rem be too late by the time the driver JVM has started.

if [%SPARK_SUBMIT_DEPLOY_MODE%] == [client] (
if exist %SPARK_SUBMIT_PROPERTIES_FILE% (
rem Parse the properties file only if the special configs exist
for /f %%i in ('findstr /r /c:"^[\t ]*spark.driver.memory" /c:"^[\t ]*spark.driver.extra" ^
%SPARK_SUBMIT_PROPERTIES_FILE%') do (
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
)
)
)

cmd /V /E /C %SPARK_HOME%\bin\spark-class.cmd org.apache.spark.deploy.SparkSubmit %ORIG_ARGS%
cmd /V /E /C %~dp0spark-submit2.cmd %*
68 changes: 68 additions & 0 deletions bin/spark-submit2.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
@echo off

rem
rem Licensed to the Apache Software Foundation (ASF) under one or more
rem contributor license agreements. See the NOTICE file distributed with
rem this work for additional information regarding copyright ownership.
rem The ASF licenses this file to You under the Apache License, Version 2.0
rem (the "License"); you may not use this file except in compliance with
rem the License. You may obtain a copy of the License at
rem
rem http://www.apache.org/licenses/LICENSE-2.0
rem
rem Unless required by applicable law or agreed to in writing, software
rem distributed under the License is distributed on an "AS IS" BASIS,
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
rem See the License for the specific language governing permissions and
rem limitations under the License.
rem

rem NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!

set SPARK_HOME=%~dp0..
set ORIG_ARGS=%*

rem Reset the values of all variables used
set SPARK_SUBMIT_DEPLOY_MODE=client
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
set SPARK_SUBMIT_DRIVER_MEMORY=
set SPARK_SUBMIT_LIBRARY_PATH=
set SPARK_SUBMIT_CLASSPATH=
set SPARK_SUBMIT_OPTS=
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=

:loop
if [%1] == [] goto continue
if [%1] == [--deploy-mode] (
set SPARK_SUBMIT_DEPLOY_MODE=%2
) else if [%1] == [--properties-file] (
set SPARK_SUBMIT_PROPERTIES_FILE=%2
) else if [%1] == [--driver-memory] (
set SPARK_SUBMIT_DRIVER_MEMORY=%2
) else if [%1] == [--driver-library-path] (
set SPARK_SUBMIT_LIBRARY_PATH=%2
) else if [%1] == [--driver-class-path] (
set SPARK_SUBMIT_CLASSPATH=%2
) else if [%1] == [--driver-java-options] (
set SPARK_SUBMIT_OPTS=%2
)
shift
goto loop
:continue

rem For client mode, the driver will be launched in the same JVM that launches
rem SparkSubmit, so we may need to read the properties file for any extra class
rem paths, library paths, java options and memory early on. Otherwise, it will
rem be too late by the time the driver JVM has started.

if [%SPARK_SUBMIT_DEPLOY_MODE%] == [client] (
if exist %SPARK_SUBMIT_PROPERTIES_FILE% (
rem Parse the properties file only if the special configs exist
for /f %%i in ('findstr /r /c:"^[\t ]*spark.driver.memory" /c:"^[\t ]*spark.driver.extra" ^
%SPARK_SUBMIT_PROPERTIES_FILE%') do (
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
)
)
)

cmd /V /E /C %SPARK_HOME%\bin\spark-class.cmd org.apache.spark.deploy.SparkSubmit %ORIG_ARGS%
2 changes: 1 addition & 1 deletion bin/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# limitations under the License.
#

# Gather all all spark-submit options into SUBMISSION_OPTS
# Gather all spark-submit options into SUBMISSION_OPTS
function gatherSparkSubmitOpts() {

if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then
Expand Down
11 changes: 11 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,17 @@
</tasks>
</configuration>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>${basedir}/../python/build</directory>
</fileset>
</filesets>
<verbose>true</verbose>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
Expand Down
Loading

0 comments on commit 2481a2a

Please sign in to comment.