From d2e6ef595ee1eb6026f8c28871160decbf50e51e Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 7 Feb 2018 13:09:10 -0600 Subject: [PATCH] 1.0 Release (#2) * working copy of lcmap-spark. * wip * wip * wip * Update README.md * Update README.md * wip * wip * readme wip * wip. update readme for running notebooks * wip. update readme for running notebooks * wip. update readme for running notebooks * updated readme * updated readme * updated readme * updated readme * updated readme * updated readme * updated readme * updated readme * updated readme * updated readme * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * Update README.rst * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip updates * wip doc updates * wip doc updates * wip doc updates * Update RUNNING.rst * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * updates * wip updates * wip updates * wip updates * wip doc updates * wip updates * updated Dockerfile to reduce disk space and optimize caching. * update wip docs * update readme * updates * updates * updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * Update configuration.rst * wip updates * doc updates * doc updates * doc updates * doc updates * doc updates * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc update * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip doc updates * wip update docs * wip updates * adding application.rst * update * update * updates * update * update * doc updates * update docs * update * update configuration.rst * update configuration.rst * update configuration.rst * update configuration.rst * update configuration.rst * update configuration.rst * wip updates * wip updates * updates * remove tmp file * update running * examples work now * updates * update running * update running * added imgs * updated image * updated configuration * updated configuration * updated configuration * updated configuration * updated configuration * updated configuration * updated configuration * renamed examples to jobs to match docs * install Merlin 2 and increment version. * formatting on running.rst * update examples * travis-ci set up, build modified to push based on branch and commit id * update .travis.yml * update travis * update dockerfile for merlin 2rc2. updated travis.yml. * update travis * removing bin/build. Updated version.txt to 1.0-SNAPSHOT. Update tags in Makefile. * updated for merlin 2 rc2. removed .travis foo * updated docs * update docs * update docs * update docs * update readme * update readme * update makefile with conditional logic for branch tagging * update makefile. need to update docs and add travis.yml in. * added travis.yml, updated version * update travis * change notebook dir from copy to create * update travis * update travis * update travis * update docs * update docs * update docs * update docs * update docs * update docs * update docs * update readme * update readme * update readme * update docs * doc updates * update docs * update docs * update docs * update docs * update docs * update docs * update docs --- .travis.yml | 19 ++ Dockerfile | 82 ++++-- Makefile | 37 ++- README.md | 63 ----- README.rst | 84 ++++++ docs/applications.rst | 81 ++++++ docs/configuration.rst | 145 ++++++++++ docs/developing.rst | 23 ++ docs/imgs/spark-context-workers.png | Bin 0 -> 18686 bytes docs/imgs/start-spark-workers-on-mesos.png | Bin 0 -> 16084 bytes docs/overview.rst | 126 +++++++++ docs/running.rst | 250 ++++++++++++++++++ .../spark/dist/sbin/dispatcher-entry-point.sh | 23 -- jobs/job.py | 10 + jobs/jobs.zip | Bin 0 -> 310 bytes pom.xml | 20 ++ version.txt | 2 + 17 files changed, 851 insertions(+), 114 deletions(-) create mode 100644 .travis.yml delete mode 100644 README.md create mode 100644 README.rst create mode 100644 docs/applications.rst create mode 100644 docs/configuration.rst create mode 100644 docs/developing.rst create mode 100644 docs/imgs/spark-context-workers.png create mode 100644 docs/imgs/start-spark-workers-on-mesos.png create mode 100644 docs/overview.rst create mode 100644 docs/running.rst delete mode 100755 files/opt/spark/dist/sbin/dispatcher-entry-point.sh create mode 100644 jobs/job.py create mode 100644 jobs/jobs.zip create mode 100644 pom.xml create mode 100644 version.txt diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..4323a00 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,19 @@ +language: c + +sudo: required + +services: + - docker + +script: make debug && make build && make tag + +deploy: + - provider: script + script: make debug && make push + on: + all_branches: true + +notifications: + slack: + rooms: + - lcmap:UTqlh9PfPVomfpli10WKyZoh#cicd diff --git a/Dockerfile b/Dockerfile index ae759ac..4c344cd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,23 +1,73 @@ -FROM mesosphere/mesos:1.1.1 +FROM centos:7.3.1611 -MAINTAINER USGS LCMAP http://eros.usgs.gov +LABEL maintainer="USGS EROS LCMAP http://eros.usgs.gov http://github.com/usgs-eros/lcmap-spark" \ + description="CentOS based Spark image for LCMAP" \ + org.apache.mesos.version=1.4.0 \ + org.apache.spark.version=2.2.0 \ + net.java.openjdk.version=1.8.0 \ + org.python.version=3.6 \ + org.centos=7.3.1611 -RUN apt-get update +EXPOSE 8081 4040 8888 -WORKDIR /opt/spark/dist +ENV HOME=/home/lcmap \ + USER=lcmap \ + SPARK_HOME=/opt/spark \ + SPARK_NO_DAEMONIZE=true \ + PYSPARK_PYTHON=python3 \ + MESOS_NATIVE_JAVA_LIBRARY=/usr/lib/libmesos.so \ + TINI_SUBREAPER=true \ + LIBPROCESS_SSL_ENABLED=1 \ + LIBPROCESS_SSL_SUPPORT_DOWNGRADE=1 \ + LIBPROCESS_SSL_VERIFY_CERT=0 \ + LIBPROCESS_SSL_ENABLE_SSL_V3=0 \ + LIBPROCESS_SSL_ENABLE_TLS_V1_0=0 \ + LIBPROCESS_SSL_ENABLE_TLS_V1_1=0 \ + LIBPROCESS_SSL_ENABLE_TLS_V1_2=1 \ + LIBPROCESS_SSL_CIPHERS=ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:AES256-SHA256:AES128-SHA256 \ + LIBPROCESS_SSL_CERT_FILE=/certs/mesos.crt \ + LIBPROCESS_SSL_KEY_FILE=/certs/mesos.key \ + LIBPROCESS_SSL_CA_FILE=/certs/trustedroot.crt \ + LIBPROCESS_SSL_CA_DIR=/certs \ + LIBPROCESS_SSL_ECDH_CURVE=auto -COPY tmp/spark-2.1.0-bin-hadoop2.7/ . -COPY files/ / +ENV PATH=$SPARK_HOME/bin:${PATH} \ + PYTHONPATH=$PYTHONPATH:$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.4-src.zip:$SPARK_HOME/python/lib/pyspark.zip -# This lets docker manage the execution -ENV SPARK_NO_DAEMONIZE "true" -ENV MESOS_NATIVE_JAVA_LIBRARY /usr/lib/libmesos.so -ENV SPARK_HOME /opt/spark/dist -ENV PATH $SPARK_HOME/bin:$PATH -ENV PYTHONPATH $SPARK_HOME/python/:$PYTHONPATH -ENV PYTHONPATH $SPARK_HOME/python/lib/py4j-0.10.4-src.zip:$PYTHONPATH +# Add a user to run as inside the container to prevent accidental foo while mounting volumes. +# Use "docker run -u `id -u`" at runtime to assign proper UIDs for file permissions. +# Mesos username must match this username (and be assigned permissions by Mesos admin.) -EXPOSE 7077 -EXPOSE 8081 +RUN yum install -y sudo && \ + adduser -ms /bin/bash $USER && \ + echo "$USER ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USER && \ + echo "alias sudo='sudo env PATH=$PATH'" > /etc/profile.d/sudo.sh && \ + chmod 0440 /etc/sudoers.d/$USER + +COPY pom.xml /root +RUN mkdir -p $HOME/notebook + +RUN yum update -y +RUN yum install -y java-1.8.0-openjdk-devel.x86_64 \ + http://repos.mesosphere.io/el/7/noarch/RPMS/mesosphere-el-repo-7-3.noarch.rpm \ + mesos \ + bzip2 \ + gcc \ + maven +RUN yum -y downgrade mesos-1.4.0 +RUN curl https://d3kbcqa49mib13.cloudfront.net/spark-2.2.0-bin-hadoop2.7.tgz -o /opt/spark.tgz +RUN cd /opt && tar -zxf spark.tgz && rm -f spark.tgz && ln -s spark-* spark && cd - +RUN curl https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o /root/mc.sh +RUN bash /root/mc.sh -u -b -p /usr/local +RUN conda install python=3.6 pip jupyter numpy --yes +RUN pip install lcmap-merlin==2.0rc2 +RUN mvn -f /root/pom.xml dependency:copy-dependencies -DoutputDirectory=$SPARK_HOME/jars +RUN yum erase -y maven gcc bzip2 +RUN yum clean all +RUN rm -rf /var/cache/yum /root/.cache /root/.m2 /root/pom.xml /root/mc.sh +RUN conda clean --all -y + +USER $USER +WORKDIR $HOME +RUN sudo chown -R $USER:$USER . -#ENTRYPOINT ["sbin/dispatcher-entry-point.sh"] diff --git a/Makefile b/Makefile index 8af1743..10e6c6e 100644 --- a/Makefile +++ b/Makefile @@ -1,19 +1,32 @@ .DEFAULT_GOAL := build +VERSION := `cat version.txt` +IMAGE := usgseros/lcmap-spark +BRANCH := $(or $(TRAVIS_BRANCH),`git rev-parse --abbrev-ref HEAD | tr / -`) +BUILD_TAG := $(IMAGE):build +TAG := $(shell if [ "$(BRANCH)" = "master" ];\ + then echo "$(IMAGE):$(VERSION)";\ + else echo "$(IMAGE):$(VERSION)-$(BRANCH)";\ + fi) -download-spark: - mkdir tmp; wget -O tmp/spark-2.1.0-bin-hadoop2.7.tgz http://d3kbcqa49mib13.cloudfront.net/spark-2.1.0-bin-hadoop2.7.tgz -unpack-spark: download-spark - cd tmp; gunzip *gz; tar -xvf *tar; +build: + @docker build -t $(BUILD_TAG) --rm=true --compress $(PWD) -init: download-spark unpack-spark +tag: + @docker tag $(BUILD_TAG) $(TAG) -build: - docker build -t usgseros/mesos-spark --rm=true --compress . - docker tag usgseros/mesos-spark usgseros/mesos-spark:latest - docker tag usgseros/mesos-spark usgseros/mesos-spark:1.1.1-2.1.0 +login: + @$(if $(and $(DOCKER_USER), $(DOCKER_PASS)), docker login -u $(DOCKER_USER) -p $(DOCKER_PASS), docker login) + +push: login + docker push $(TAG) + +debug: + @echo "VERSION: $(VERSION)" + @echo "IMAGE: $(IMAGE)" + @echo "BRANCH: $(BRANCH)" + @echo "BUILD_TAG: $(BUILD_TAG)" + @echo "TAG: $(TAG)" -push: - docker login; docker push usgseros/mesos-spark +all: debug build tag push -all: init build push diff --git a/README.md b/README.md deleted file mode 100644 index bb14e3f..0000000 --- a/README.md +++ /dev/null @@ -1,63 +0,0 @@ -# lcmap-spark -Creates and publishes a Docker image for running Spark standalone or Spark on Mesos. - - -## Building -``` -# Pull down and unpack spark -make init - -# Build the image -make - -# Push the image to Dockerhub -make push - -# Init, build and push all in one step. -make all -``` - -## Developing and Releasing -Releases for LCMAP Spark are handled via release branches. - -A release branch should be created for each new Mesos and Spark combination. Changes and updates should be committed directly to these release branches. Docker images should be rebuilt and pushed from them as well. - -Versioning example: for Mesos 1.1.1 and Spark 2.1.0, the Mesos-Spark -release branch should be named ```releases/1.1.1-2.1.0```, and the Docker image should be tagged ```1.1.1-2.1.0```. - -## MesosClusterDispatcher -The MesosClusterDispatcher is a Spark component that is used to run Spark on Mesos in cluster mode. - -It listens on port 7077 and 8081 and serves as the master when running ```spark-submit```. - -The Mesos-Spark image can run the MesosClusterDispatcher as all required dependencies are included in the image. - -Please see http://spark.apache.org/docs/latest/running-on-mesos.html for more information. - -### Configuring -Environment variables are used to configure the MesosClusterDispatcher. These are all required. - -| Variable | Example Value | -| ------------- | ------------- | -| MESOS_MASTER | mesos://mesos-master-host:5050 | -| ZOOKEEPER | zookeeper-host:2181 | -| FRAMEWORK_NAME | TestSparkCluster | - -### Running -An entrypoint script has been created and is available at ```/opt/spark/dist/sbin/dispatcher-entry-point.sh```. - -The work directory for this image is set to ```/opt/spark/dist``` so an entrypoint need only reference ```sbin/dispatcher-entry-point.sh```. - -All logging output is available at stdout. - -Example: -```ENTRYPOINT ["sbin/dispatcher-entry-point.sh"]``` - -``` -$ docker run -e MESOS_MASTER=mesos://mesos-master:5050 \ - -e ZOOKEEPER=zookeeper:2181 \ - -e FRAMEWORK_NAME=TestSparkCluster \ - --entrypoint sbin/dispatcher-entry-point.sh \ - --network lcmapservices_lcmap \ - -it usgseros/mesos-spark -``` diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..6b3f622 --- /dev/null +++ b/README.rst @@ -0,0 +1,84 @@ +.. image:: https://travis-ci.org/USGS-EROS/lcmap-spark.svg?branch=develop + :target: https://travis-ci.org/USGS-EROS/lcmap-spark + +============ +lcmap-spark +============ +LCMAP SEE Spark base image. + +On DockerHub +------------ + +https://hub.docker.com/r/usgseros/lcmap-spark/ + + +Features +-------- +* Run `Spark `_ locally or on `Mesos `_ +* Interactive development and analysis via `Jupyter Notebooks `_ +* Connect to `Apache Cassandra `_ with the `Spark-Cassandra Connector `_ and `DataFrames `_ +* Includes Spark 2.2, JDK 1.8, Python 3.6 and MKL-enabled Numpy + +Example +------- + +.. code-block:: bash + + docker run -it \ + --rm \ + --user=`id -u` \ + --net=host \ + --pid=host \ + usgseros/lcmap-spark:1.0 \ + pyspark + +Documentation +------------- + +* `Overview `_ +* `Running lcmap-spark `_ +* `Configuration `_ +* `Applications `_ +* `Developing lcmap-spark `_ + +Requirements +------------ + +* Docker +* Network access to Mesos Master (optional) +* Mesos username (optional) +* Mesos role (optional) +* Mesos password (optional) +* Mesos certificates (optional) +* Make (optional) + +Versioning +---------- +lcmap-spark follows semantic versioning: http://semver.org/ + +License +------- +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to http://unlicense.org. diff --git a/docs/applications.rst b/docs/applications.rst new file mode 100644 index 0000000..c032669 --- /dev/null +++ b/docs/applications.rst @@ -0,0 +1,81 @@ +Developing A SEE application +============================ +SEE applications are created by extending the ``lcmap-spark`` Docker image. Additional dependencies may be added to the derivative Docker images, and code may be developed using ``pyspark`` and the Jupyter Notebook server. + +Once the new application is ready to run on the SEE, the derivative Docker image must be published to https://hub.docker.com. A user account is required. + +``make`` is a good choice to build and push your Dockerfile. An example of doing so is: + +.. code-block:: make + + .DEFAULT_GOAL := build + VERSION := `cat version.txt` + IMAGE := + BRANCH := $(or $(TRAVIS_BRANCH),`git rev-parse --abbrev-ref HEAD | tr / -`) + BUILD_TAG := $(IMAGE):build + TAG := $(shell if [ "$(BRANCH)" = "master" ];\ + then echo "$(IMAGE):$(VERSION)";\ + else echo "$(IMAGE):$(VERSION)-$(BRANCH)";\ + fi) + + build: + @docker build -t $(BUILD_TAG) --rm=true --compress $(PWD) + + tag: + @docker tag $(BUILD_TAG) $(TAG) + + login: + @$(if $(and $(DOCKER_USER), $(DOCKER_PASS)), docker login -u $(DOCKER_USER) -p $(DOCKER_PASS), docker login) + + push: login + docker push $(TAG) + + debug: + @echo "VERSION: $(VERSION)" + @echo "IMAGE: $(IMAGE)" + @echo "BRANCH: $(BRANCH)" + @echo "BUILD_TAG: $(BUILD_TAG)" + @echo "TAG: $(TAG)" + + all: debug build tag push + + +Keep in mind that Dockerhub is a public resource and all images published there are public by default. + +Do not include any sensitive information in your image such as usernames, passwords, URLs, machine names, IP addresses or SSH keys: This is a security violation. + +If your application requires sensitive data it can be supplied at runtime through Docker environment variables using ``-e`` or ``--env``. An ``--env-file`` may also be used locally. + + +What's already installed? +------------------------- +* Python3 +* Pyspark +* Conda +* Jupyter +* numpy +* cytoolz +* lcmap-merlin + +For a full view of what's available in the lcmap-spark base image, see the `Dockerfile <../Dockerfile>`_. + +Installing Additional System Dependencies +------------------------------ +* ``sudo conda install X`` +* ``sudo yum install X`` + +Installing Additional Python Dependencies +------------------------------ +* ``sudo conda install X`` +* ``sudo pip install X`` + +Derivative Docker Image +----------------------- +All SEE application Dockerfiles should begin with: ``FROM lcmap-spark:``, such as ``FROM lcmap-spark:1.0``. + +For a list of available lcmap-spark images, see https://hub.docker.com/r/usgseros/lcmap-spark/tags/. + +References +---------- +* `Running lcmap-spark `_ +* `Official Dockerfile reference `_ diff --git a/docs/configuration.rst b/docs/configuration.rst new file mode 100644 index 0000000..4408fad --- /dev/null +++ b/docs/configuration.rst @@ -0,0 +1,145 @@ +Configuration +============= + +Parameters +---------- +lcmap-spark configuration parameters are passed differently for ``pyspark``, ``spark-submit`` or ``jupyter notebook``. + +``pyspark`` and ``spark-submit`` are executables inside lcmap-spark that implicitly create a SparkContext. + +Parameters are passed to these executables in the same manner as non-Docker based installations. + +See https://spark.apache.org/docs/latest/submitting-applications.html. + +``jupyter notebook`` is a general server process that does not implicitly create a SparkContext. + +In this case, it is best to pass parameters through the environment with ``docker run -e key=value`` so they may be parsed in notebook code with ``os.environ``. + +Volumes +------- + +There are several cases where host files are needed on the Docker filesystem. + +* Mesos certificates in cluster mode +* Job files +* Jupyter Notebooks + +Volumes may be mounted using the ``-v /path/to/host/dir:/path/to/docker/dir`` flag. + +* Full paths must be specified +* Set ``-u`` to the host system user's UID so file permissions match + +.. code-block:: bash + + docker run -u `id -u` \ + -v /path/to/host/dir:/path/to/docker/dir \ + \ + usgseros/lcmap-spark:1.0 \ + + +Network +------- + +Docker has extensive networking capablities. + +lcmap-spark's use of these capabilities is minimal. + +In order to properly overlay a dynamic Spark cluster on top of Mesos, only ``--net host`` should ever be used (host networking mode). + +This configuration also simplifies host system access to Jupyter Notebooks in local or cluster mode. + +Notebooks +--------- + +lcmap-spark's Jupyter Notebook server expects its notebook directory to be at ``/home/lcmap/notebook`` by default. + +Mount a volume from the host filesystem to this location when running the notebook server. + +.. code-block:: bash + + docker run -u `id -u` \ + -v /your/notebooks:/home/lcmap/notebook \ + + usgseros/lcmap-spark:1.0 \ + jupyter --ip=$HOSTNAME notebook + + +Local Mode +---------- +The only requirement for running lcmap-spark in local mode is the ability to start a Docker container. + +The Docker image must be built on the host machine but does not need to be published to https://hub.docker.com. + +If you built lcmap-spark using ``make build`` then the resulting Dockerfile is: ``usgseros/lcmap-spark:build``. + +Cluster Mode +------------ + +https://spark.apache.org/docs/latest/cluster-overview.html + +Cluster mode uses Apache Mesos as a cluster manager, allowing Spark to run functions in parallel across many physical hosts. + +Cluster mode requirements are: + +* Ability to run lcmap-spark locally +* Docker image published to https://hub.docker.com +* Network access to Mesos Master(s), ideally over a 10 Gigabit/second or faster link +* Mesos username +* Mesos role +* Mesos password +* Mesos certificates + +When run in cluster mode, the lcmap-spark image is automatically downloaded onto Mesos nodes and used to create Docker containers, which dynamically create the Spark cluster and execute application code. + +.. figure:: imgs/start-spark-workers-on-mesos.png + :scale: 50 % + :alt: starting spark workers on mesos + +1. lcmap-spark is downloaded & started on the host from https://hub.docker.com +2. A SparkContext() is created, which requests resources from the Mesos Master +3. The Mesos Master schedules the Mesos job on one or more Mesos Executors +4. The Mesos Executors download lcmap-spark and execute a Spark Worker +5. The Spark Worker contacts the SparkContext +6. A Spark Cluster now exists. + +After a Spark cluster is started using Mesos, it behaves as any Spark cluster would if started manually. + +The SparkContext maintains its reservation on resources with the Mesos Master, keeping the cluster alive. + +Once ``.stop()`` is called on the SparkContext, it releases it's resource reservation and the Spark cluster is shut down. + +.. figure:: imgs/spark-context-workers.png + :scale: 25 % + :alt: SparkContext and Spark Workers + +Running Spark on Mesos in Docker provides a reliable way to dynamically create a consistent, immutable execution environment across a cluster of machines. + +Apache Mesos +------------ +https://spark.apache.org/docs/latest/running-on-mesos.html + +When running on Mesos, there are two modes that determine where the SparkContext runs: client and cluster. + +lcmap-spark uses client mode only: The driver program (SparkContext) will always run on the local client machine. + +**This shouldn't be confused with Spark's local and cluster modes, which determine where the Spark Workers execute.** + +SSL Based Authentication +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Three files must be present in the image ``/certs`` directory to enable authentication over SSL. + +They may be obtained from Mesos admins. + +* mesos.crt +* mesos.key +* trustedroot.crt + +These files should never be published in an image as this constitutes a security violation. + +Mount them as a volume at runtime instead: + +.. code-block:: bash + + docker run -v /home/user/certs:/certs usgseros/lcmap-spark + + diff --git a/docs/developing.rst b/docs/developing.rst new file mode 100644 index 0000000..e6424ad --- /dev/null +++ b/docs/developing.rst @@ -0,0 +1,23 @@ +Developing lcmap-spark +====================== + +Developing +---------- +* Perform development on a topic branch or the develop branch. +* Set the version in version.txt to x.x. Follow semantic versioning. + +Building +-------- +* Local builds may be performed with ``make build``. This will create: ``usgseros/lcmap-spark:build``. +* Travis-CI will build and push ``usgseros/lcmap-spark:VERSION-BRANCH`` to Dockerhub for branch commits. +* Travis-CI will build and push ``usgseros/lcmap-spark:VERSION`` to Dockerhub for commits to ``master``. +* See `Makefile <../Makefile>`_ and `.travis.yml <../.travis.yml>`_ for details. + +Releasing +--------- +* Merge develop to master. +* Tag master with version. +* Perform github release. + +See https://help.github.com/articles/creating-releases. + diff --git a/docs/imgs/spark-context-workers.png b/docs/imgs/spark-context-workers.png new file mode 100644 index 0000000000000000000000000000000000000000..9893bbe20eb7a3a599016760961a1c2d3e47c294 GIT binary patch literal 18686 zcmd741z1#VyEaS-0}L%NNQaaNBT9Ej2m)dN(y7uRF?5F@AuTbWpr|wtqezFOfPu6$ zj1m$<=YP-O^Srs=z5oCF_Hq2op)hM!-0NOzUFUV4*Lh8ho{l;N**P*iJUj|b4OM+S zJOU{Arwt(nOVTEet?}?QK4_{c-SQb-$>DUoZCaaGQ{&Ir8R4I(<-Cu+bBlu-LPQXv z_~-B6Z-4(vggy(^)`%n}xbeB`d`)iw_-!MtdHL0pF8F_>wF>RmUuSx6Ab4yF%n)Z z#-%^s7DM+2Ma#uS78c#Y!SxIt%@6$_z4^Pf71UvAX=zXUkqT57D^HBf&Ds6U-@JLl zMjfG~I)^50&5`kb%FfPo*XAUQzinr0p_iEoyY&Vpe0sVys`ukZ?b>WvgubO7>?|=2 zO+y*jCkYxB}ib9`)hFhy0y}ppyw95fOY_;ZJc9H&-zYpgOl8;Zup0b}Hl;{-(-S3jQ>iTQEW?{A?A>s8K5L zaCfSn#YDk>_fuMOk8Ska&#y0yQS7!=@NC-o3X}H_U(sjgtR3!Q7M4eIEly#{4w)k|N;;yr!XyYiL| zMM%C0L4|JKylG4zEBf`D$7FoEh&g4q5|JdE5iy&b?-d?s1#&=w#Iu{M!GBM5xG}M) zF363=zD4ia*`8QN5$T8!X*(r4F!eV55td$FsDK1K)>ewmAKVs`4FP`qMakC^*j7_7 z7>w?Sn7k`5+|j~|i%GvBtfVlgLSWxQEr%E^9AYi|nuQDpdqW|?#n?|9dga}zEdaG4 z<$9x(mtd`wM>X6(+}Qcr)kOJRXWTLuqV|d$*hM=8a-}5z7OrfW_`}@i@3fI7ld`0@!QG!EM@KB|78m0;pV&4&Zo)ilHS_&l zi|fj0b-vq)2~xf#G0bsAFk;2xWJbKl?G;a1!2ad~>ftVVM?UBtSp+}dg4&v4kN$Eb zoWlS?dB$_g>ob_i&Qekpr#?KbI)-Fw&h{?NBaaWN;|3N31sz}@{iN=fT!J^O`SZ14 zFhP!@s<}&tvS;BscAVkS?w4f~j)cSH>U|5iBCl8(^7JS{tIOHl%+G9##E&^cR65Vm z$tP@PoM7j&v-D6ThC6lLvy`~=Z9@E%&s>=wZ`(J5Su&M|>?HT@1A6MtTH!ZBjG=P{ ziJ6j5gzpvPBFwH*PV7~7kG2$a1t0LojW%Z248MZgG)w85JQ2hWJr|;4b9QEe6Di=1mk_5We9#<5V$3V>gfqr6Skp34X{G(vylVe2iEs>073~OVC2>kp zetN%ZTZVPh`vEa?vi+5@x2ANF{$0FdPkClRtOLQWR(brHl>Tz9()bsjf%&*$ z9j#T-3oXTNDX1fIOPvD0?RaJ367I1uCvPpY8n{jCr#V!hj=k#GKm`I9_#-k5Z-vB zefQBJ!@dV6V>7w7wKTMFu(PUCCUP2b!Ij{LlC?IY z`*LJ@$H%j`kclGZ4Oc*@>HYHM%O^jiLXh)|moKwiCL08x07dwrBw6fHuMit^FgsB2Km~9PYY#tj~O~^YFNNdR#pirS%bb`o>IZy`1KxnVFfLh-W^;GM*k( zMd62SUw{AGSBu}ieaqiLI0XdMQ-~J4H%f1;CAg9DO-!fno%HOFbqU+bKsJ}MGbY{4 z-pg`}xjoTlVhID>JC%EWmX`S?RB~JOML`pGr4E^5(W{Y-MsK&})+W|xJNzi3^cpiW znZS&Ji)ra6A9y&teT7uCpn~+_rk)3Nz3-NNE&Nx&=8w-_tOVDkfbv~( zx3d5CmqM+uDeamBHo0@Y3Au7lJdXCalthqHc#b>Y-WYt_=hj;uE7;<*4$hdHJpu4# zPpuB)58LOGwUMOGrJdv7enr*!A&@x@d!t(p-)m6}=9ZN6+ji_(A`<_yYy^WCG7+3d zJ{BUxFH6(sD@uE{2YFW=Y(Q)hY9s(R^5K7O;$Cj6K+R;KP-ar|#iYR>L2a@+**JN8 zeb4bF%aa<0s}Zqz?WUljb|p+R?pQWb zpZT)CIn1r{g+9^2llA3s%Gm9o_zPomyD8*jr?Db{6H25T>Z=p;)q$@QeZ})4;w=W#~nCR0x6)qS`bb(h05$B}+oi0CM?i z$#F_=;&r4S=O#Vzwghw`p5$^MN z)=i{OO)-vOaJh ziak#98^i;ei@FPi#R>)j9JG#?%@@1YpT#Fb2**^F+cR55yKMBak5QzGIFFJm*L~_w ziFkG^bQv@9gq+Y$rMMEh#EtnFnjb$#^chs}idCfiUu1_%I5+iXQngu**V2j3=~E&( zND3j;k_c9*M|+IRH>CvJB$n7c1SK2X939e&x9N!&^2%4_-v-zP(WI67DS!+K8*NnR zU^UxFqXVKOQN{K|qNPhq$n)e^(};h4P|S;S{~?erLGvuzJ+49AJDAM}F&X?0>S3~L zLK!#k>JB9nUZ|wOt(5vh}rT%H)1Sd|AEM4PRCGOuUOCR|-<3|lUE~ZN6wJ4rD z;y8Kpvla3z_zFDed3X)W!5+IGE?BbY{S|t#u^C)8Vr=}z<-mUh@zW*$^5Nfw?fB*MzYq?(YsZ3b>qSkE)%!FRR(jVYWV}{bh^e37%u~{C@sh}9bUY6 zF}l^4D>pV?@AuZo$cU*Jb9{7QXJ_}K#(n-8W@El*0gEWUb%8&>6mEMsI(T?^tTBnQ zq_q0h_209*cW*2&Bc{@5VPWAZK$f>lNcn*Gh}|_y41g!rGs%GaEVMsnc0$|`?r-$p ztF)~*e(PNA`7Y=r5KB#z>PH$A*cB*KdycVyQ_ePT))-j--HfzDbUW#29_A{vHx7U^ zOFe<@@!80?E|Zk(6#UbSjM3!=d)(PU!$}Qjv%?kj3KFJe%E1;7g}0cy#PK*mh$EEiLudc%X^5M22i9 zrHEN#y;mokNC)wmLW-C@BLM$YVhGc|mJU}H4~--4Kz1Z1>BBlhIizJ3f{y(!SmAD= z@|P`5Yf41i`b$iav_BrxeeBMojK&Y5v+*dWuu7_b(9m65)jv)0NBt82|-|P0pr$BZfH&5rF1JkJ*w{hSI46)k@_$z zb0aH54l231qovC{pABG%3E7;MY-$Z;S8-8sTNTD398-X14@tNs7qY4GUfI&bG8(Nt z(1R}{oViR(EId+t#=i6{jB$bVAw>-bhx+mRf(qz1(PDbag(L{UTx2TbJHq^Es48`=p8%1`M26oVCP#@PRdGG+9rTfqPW9ubsv6z$ z>00&nElr`f!VOjHd-jL}lzie3#ezcihzAl7^lm1d;RC{N<_yk4ZI!?7N0o;B9n9MnRet^{$!4G z^QwS!wJILXUQV`SxCSX7zZdF`C_XbT8x^wBz79JtG$0YrgzV}N5N?jM%=M|4RhzGS zvavh&Msz25{eca&{~9^}PonPs`L^!xy~!ZW z))2FJR+(+@)=+GGS4g;i`OjdF%`3pAIt~8V6Iy1*icpTu&f9^eSa4Ialyt2_U|#I) zZ@mG82+)%S5d6PM*9Dz4Wai{JefXWaOMW_()b35@(=9$BN|bx&vu=g?@%ee5r}Ox` z&*SeswNCd1SdhaJ5Q7Ho7}Y3b5p4PodOnssL|aH-Se>L#e7!Nvx=?WAe5dUsiT+lnL(+^ z1HuhJGV_GwZ|XqiJO}v<8-U1nQHG?H#g!CAbgRJDgiRrEoY*r@C#44!ykhpw)lx4g zp0FbTad5sd-^AuOS3riiS9LR8YiV0`GRxQN#MeP3n{(rzXKlgy_H!7_qQZ1@tP z7x6u4A;$xpC@m3Wu^N$q=y^r2U?;V0PQ_qDue(9<4@f&$Ij8+PXCk6G2n*rmzq}00 z%3S~ktp@lfiWB|L6JVsG)Ji#K!u5Tl(zZ=6NObgs_*f4MKmr5*n_3Bl=6Fgio@0e*zy`|{F{CtFv zy2f=NRGB|-=y#hFtdu={7F$v2Q#4mduMkNv_-0EreGAniT9Q={x#UF@ax0MCa@hHc zMZpp2aJ5^&qJ~`e`JG`PVp*xU@(?O3zK~TF5G6Ckuemeiw%CuVUF>(8-(52lo?nZ= zq4P3xr!YJ}VUJX*b10NLlZb#T0YP9Y+mHW1-%?RAk${)r%R$0)f?Ixewvyugck1{D zOjd2wc+IsRiB#2Q?hPQP6B{rTfPtxCmFd<}y330_9Q{19ycktQc`Vp9zEM$~Hqo>JA#VuIdCrGb= zJDI(>wLm=-9+FfEsr2U+ZXVZcdK^c*)y?&lF_JL8K=aW+D%y4w>09*}rq8)nsbIU7 zTX4+MfA>gAyp9cXA@N);TI)&&O{69ez{)ytJX#O-`imkJ9jPKj>FusWMYRKFx$exfS&X^7Nt{+<-LezYb z0`?)^FSkTi{hBINStO5H8aH-3=;;F@Dcft@=vw9;XMxkxgL!RTdvqjzN#{KRXogv= zOk(~jX%U=-qmN(xCLIye)YK>b_5&ranD+Uj9-S_YWK~*}{uR$p61KZDSUpX_YqY;w zauvHj)w|T8qvfhYOTR;VXUOk(_ot5Z?+D3%rVC&Pzcr};Uu+v`-jzV?w&D_WFcujb z+q1TzQgqUar4yh`ffe?^4+y@XWC8N{zhC&@eg3zN{(k#N%Y`7J9#EzEHm@);5EI?9 z7IyQcPRXA|8gb84Flsb|9j{CtJ0X01eZPMAVC$3l^yz-tDU&&z!WMei?2{&?dtbt% zhwp&o2K$MXaUquA{H8lTw1QZ8ZsC~GEcy*^eEBV>fSMcr59>!eebc3myw{&A1~PM2 zP{F|fWw3c?YkgK7)Ns~}$-|sd!Rwm{L!^xNuPX}jqJMNAqd^%ZY;~zOO9GUDrr$CU zj|qa;YjYteh)@q271J&ll={&6f|nrlKp*D#2K(Cn%o6FiHlz6ijA?mD3W~aoeUYfb zChGl$07|}LO_)hSXJuuD?J$-3Zp>+7fJA5)7}!t~c;s6<0a(b>C@OaGs;Vj#g>&*J zKnGK*k#HWr4-~c0;u0q(-Di$zScLriTs_R!A8?@#^G_g{8@r%%YXBW{L}Gj+_iskk z)YN!s$8Q-TkBkBB5==kB(hCc@ZY+(^tGN+1tiGLYy#`3q_B+#BuM#<(=Q=I^al6i{ zmXgISiDQC&a(y|aD0`qdI=wMWKs`d)-ZJ()*j_=I$XA&f)6ajXIk`M4$oAIZjeNlV z6-l;({F1vSCM^E)gQ=6MuwBk|m7>So8?BG%lGusLa^(C_AoOoqHXK@+EuyVV#nX!wh_?O~>u>gM(M?LV>a-0SZ5*X)I^bQS=_m?}Ly#^MW z8wN&SyCdI^zAi=i)S}4n1$L6R-?$im@Ys7C9>VSevYg|i?bwbxvtr9A*&C24kTI|v zPQiyjqJ!$MaL1AeivY9Ps8(M<6rIdlT^;zQL1r=9U-^A^dOh>0*iOibUOTA%9T1{z z^7yFv&Q;#Z4Hw$HQZ)3v)|;fv5{=)%!OmU0Ki`ucb~KAI<+GHpc*Xcw$%djk*%i=C z(yl?q0%L-sg)D2&OQb;>b#U&9sE+Y%;;DwV zhEHq*qk^G0Eugj;}EjKF6+>xqWZ^ z(N0CXrcR5-*1C1j+LV$pRT^Xp%+qoHdRV{QBkj%i4qC!ln?`kk(=8DwVVL_xYnlT+ zGm<_{R@=Evo~`FXqL(XzJ{LWfCgm=C`&|}uluXx%q9o{QYPyWYC!L92`{pINPB zch;o^eUfrweEhK}x%`eoys0xp!+S&aO9pssZp>!y@q8rjcv7JOZpY_vmT;8d0{e0Y zN^tI;$fzL7)_9AVOa-;Q!pi{m-tKcwp<6bMev3(>HJE9YsGF-g@&mf-5u)hX63<8L z;7jsMvybCM26?$x+BG%oMG~~qD3bW0QCDG*v;aPaUj-$PG{{NCtOS(rs zWw>UHy&h*611DjfKpsh-J{!7j{q&&;Jn)C?VEgcU9c5v3UT4#8nDxrL zH5HMa%!+sZnRnv0+*(o}v~PshKm7TnG$)+X?NVCz%=uKZR?D2$Pv7ob%@TcJOy4i% zeHqcRDd|K{#|s{;R1adS8BZii(2P%urcB){U)EXSc}aJ&UcE#sJ`HhT`MjiwA-fh^ zdbip&3oEFQPKxY^>v%Feltsba?75D~SoYVz>f}Vtj0`cYf(6Jx^-{#!Fgo4@7179! zgzRJ)fuQuSIlMj7!|gx)k0b&oo=%s z(RFhOO1)&%?HcH4&O37@=VX+_o0)6Gef=8~6ag_uXp?@Y+jR}5#;Hr$=o_lF1K+@O zvASFSc(35MLZYlUU-@*@x6W)LDt}c|L%P`OvFpbI+Zeaf8$;q4ZgfHB*~QOdKcm^b z8gwKlqd0}7y0`kiDBG8@ILuwHxcDV&AWLmOx zgw5X0YHK~NV6Q1%wq7>R{hqy=E57)ITW8Qez1rOowIrvj95Ky7)~Yim$maBhVNBew z5p{baVH&U3QW6sP6j7J{@H!7Ib|rMbWqC;@c&!X(=}OX&c_m|@2idCADk%z1EVsN^IZ&~I@7_RfFnQ zA|@y}-Op7BDus8ho`+%?ELQZqH^zOT)$ITXuBat>-_4>L|2_Au0%*~}=H^2&!hEnd zpyh}I=Io?ydn*zFU|$OOJH}q!fX%(7iOkH*l>1JlACtZ8GEv7=!=^&hk-+W(3XJcZ z>|O2cUnACMB5*?@Yt=;q>0rh6hnERBY7Gz-TFQsuIsw?4mB!?IPlqD!Ya*DDs(U4S z>*p&bK=JWSy-s_&S}Of3=g=SB-Pf3hpM7MRyJpEj;R#?FWgz=nef-Ri;BO~M6y8h!tv8F)H`6{+0>pEKzY%IgaAB(ZNW%QZY)$DVio;aw|Ixlqx$L4u7#AiA2tmtGttqOfe z_RK?3?mlzac&_`zm-gf3eYBH)<6S!rE6D2Sz)IncO!uDm6)Tk#%a4q0n6}4Ek|yuM zc5k?T+H(Wt+1E!ih0;xw559-@cSATRiw)vKKX>h| zV7rzb?~@DiB=gJf_#PbIPht%{Z@UoOb$nAqC(d8U{pc0990n^)bi@TLyepq{bC1j} zPrPfLt0*htcYMXlG0eJqav-L1gS1t%O}Y-b@-#nGQtE*$KT^7aisfL~?pTQ7TRZuQwZwgn`DKzzv`b5L#sE!!J?JGs>ObLl`{-FsnJk9IN$=f})?G^gq z!9Dr}??9xFWoJ;Q+Xkb;&#Sx)#t4kE_XQ1h0W^#x?Tyl@#qv+vQ@C9-PMY4Gx3o_vGW;|c9e@RNL+&{zvs`&VCsqJ?W&6>?>sx=;z&h% z{(=8of0`yy%G{`Xz>jF??ijr?-XH1}7b_{Eb!_PGQ&~kP$5W1cyi1&kcYJwK*Yy5N z_puN94lYv10iq#Pm0?r)Olmwc>lk(gg&AQ> ze%-;1C>0Uuw7V|Sq0R*pm?yfC@(QnoY51AbO1FmF-?cRDt4kki(j1JZCUr6;QzZc2wcsj zq^tLlHkVaHrTVI=6})*|`pq+Yzv|&PFuTZU!uITO=mz?43Lub$P;9=^r2MlD& z4v0-gFQQtdYQN9-jM;{G7%Lg8*`+(F-pSaO?!VW``nnQxN#}^BvUETB>eN<)lYzM< z4BbuAUC{alzKp+VxEI>p)T^iX>%-s|%}X@GPzi$DQqA!dwVDx}I_`@#$;rYxFS0A! ze(hCUb>Uew?`xjw$SC6lH5e=X8;>(+Oms%`oo(MIuDH*5d`!G@E@6W#e%H}S!JhWp zj%sYXSib?bByUcECRkCGdX*sbfFWG8mnvH&TlX_*45bR&6CT)C(xY;KK|^;$8<0-5nnrR^Guj$02uFn$J@K3}3462@C)Tt zksjXqC)qUxa#OI+zrH$GTpuNyQ6I7V32I67cabPNg@-njIq)`{a}5tEu#krrX}5Za4hHUK`1d~4M0da(KB7C7a-H0*+Qkubh-`RW|E2R6GaKm^ z``k);sTch6XbNR@CPot>Gi?U}k%-;Wx6EH~R~oH4 z#)#~E-)1vv$js|`Wpb_B6LwRoj()}r^}9ht z5wlOK;BE>q6qCo4{mAuE_rLWE4QH;jlQ z@BLWQmQK0_o-`W*4Vfo)#W_9&hrBg1Ik;4%@@pTob+Q`#Cj>>LC!5ko2H5yFZlIbr zFuHx`J|66TJ1N9IrCwy1hiXS?l@TNydUvUqgDlz1stg{@LhujS>tDGiE|XkNL`w8h-clK#JAIEh4Z zLqU>^t${@{jGO=;H&sJfnR%nHBABrou)tjSc#X%)z4fI0tJ?~|OUUML4l?HTU|a$2 zP}b7tXy5Y!g%Bt^-NfCjV4#0zoABa+0#~?`U1$XcYl!Z@49w?F@GKgiikPlZiqu+;51Jh(!zqgYL zka)p)y@pdC0{g>z+XljZaClk6sHqQ2U%!6s1@1^6E~c-f zE^#re%eS;#>O>-d5cCqRcZY>Bo-g?>)`J=Wd>PX~Qwzh|w&7P+r-2PbMJY#7Modf$ z^5d=Z_jA$s8~8S)8~d9Lz`-f!vjSMaZA)wg(gU;5i}i|xiRvQdzuSrNhA6Uxq~Z0W z*yQ~s`Lo`qw=}g}nlg0No)MF0tP42sT9e+oO5hVWG<+u=UY<~BlUP_pN|;Fzk8C}Q zs}nTdf^l)sa%ckghUk-!fCVu*yG=8xE5Wqc8RXd|v_9lcUg zLj22Ad~HiRtCN%Z!pJfTk`!C@J7IAIwtDIAfocNK3SE2;=(A+R!r!1^s>`u>l?@gJ$)q=f0`h)v{uY)EEzv{T~7gwR^@2)PCQ z@{>Y1^`?WT8l{!93XwZefp~ERbBimGTEz)zF9+)Neuk&;w!<*ldsCg}$v+Hd zx}!D(34F_K2!7m97m+Q6VlyN=1$-vb=mYxjJXeGr@Zk&=>rQ>FUvmGY$e!WYMb7}- z8Y!0-gyGsWEs2DFZ0{`;Abn6{)iiD@a#h}t4XKF%5%b*l%q`S(a(g;62c2}Hr?<9x z%creIp7>ksLqDC+U7m>Pt&DnwQtYU0BjM$3hao0jPJOL9yoa3XrWk%7xt^QLDoz25 zIWF5NBI*}=ph?3QE%9uG%|xn{a5D~1g2V6SIVf953BY;RE}?@5Bt)CJ`#p5i-khJo z(9B^H&m_q9D^Whk<72ng7a@3b75h%jEcL8z+kWEeC4O%#%%g&`y`)$f%>zn3zMK&~XJNDimZ-J`P*~l0) zoP|j3y6^aZG*zq_H@yomIqsZmG8YgYXg(PN=<=tP4XeK3O0mCfYyTsk+i!UKYumqB zA5Oh*|8XZ^^1%AIZU1f@fBGku87&RvfAmyXcN{w#V(-W~bcJIiR#f^EUU3+UeGiZ} zF`#v6s4OcrH8p<-jYemI*z>2Y90^2~1rR<5gOttceZOVPc$W%7V`F1`fnRl}k7=Go(K|Bs2fsvQG2m;5NA+W9y&@NS3<2v1v zruYz4S+K5?-hqKz2RlHO@&G<{aaUW2RUK%)*}@q@;DS8_p%=&ef${!8qsA5?HAU+v zkg)Y$wJ;XwXE-(OgcttgwYm-0T4iI6*}yFDi?38zm~KmuUoAZW<`FNGAYR^$5y@l6 zWKXS5n4U-_NlGsZDcjH>>Nq7sqSPW~T z(FE3qh!6LjA`s*qAAVaGG={z=gMMX@bYqM*WfJ{mIel-5S5lFJB| zQDB%H4xPNcH?q>eUJ1wH8(FIc>?I!u?EpmFhT)EZK~$)o0CcmM^if<7NoQ-d!_a6` zk6*<@(pIlz;-5zw8iEZB{IB>tFEFZd?Cg(7w1w;po9AMTc#qa)2}GN#3hoRwY~7QQ z<-)7pPyRk-Sn<8CO;1ArTD|`*JikA@LkK!s4AN>)1Jj7$jD%?n(G6=(rpwLTrRJ*F zfyV`F;VpP<-l=kp>xMfry8gUzIa?{xg0J2}e-nCLPoYv!gW{QrMf`hF?HlBs{?5Ri z(Y7nQ#dq_DvRooVd_YY~x@tT(R>B2A$uuHQ72cmr`;#=evR|oy7fNIjY8BUWd7%#@ z`+|z77m#H6jyA zDBjV)UpY7lH+cbdIMDUB%0TKefx-E?JPJh-Ckrf}w0hHNxu74pIr7@2Z8*C%gNSSh z9EGE{AT2T!?;sSOd8P9#v6=+jN@C_Y)Xh&Fl?NK@?%KTEy0;r*ow1aF=SGem=Itnx zJ)Q-^%8n(+o0))9OSo?6x3(ZM7_&n<47cpaG|4kjDtGVe9jyn|{8RvR# z1^7EZ8DqlKnA}@5AfETH;l_Wvbd)pLct?0_?w#j~@rtCEQahJ0>vw18C~x1h*bq4^ z)}Dh9X$*8G8=>JTAQz6c&3MHdeR>OqfxE%t+4)u_F;0sg5&1bs;@*J005^6pIbld) zf5y9Ik&Neh%7;3_PelvBU#v_21mz0+Kmm*|MDTAYuFY3 zH@E%INBE}!fMNa~GcF(7jSV{N2s&iH@(9-g%N}%CVR4fWi&X#MIR{Y#2VcJz{&$~a zlLZ{O{^?mi-ChMtp#G)d7DuguTFM8bHB#yLbc#o@{*F^J_$NZ*ER&~^>)(0O0Rm%> zh{LzPQ2rVGkj2vQ?p=HNpkrC!yto$-5U>_dQd;_NiB+`nKhr8;g8*stS7CsZ|IUx@ zB?QI#0|^-t#@2!F!>g1CP$4m_i31pl)zW(IiAzZMCCsYt)#8ttnwU8D%vb2jSqVe7 zepZ=H9+6|+l5B5r)MB!`g>bn8adIIc0O~5Ys}Ztvv8>!e71Tfawjh`Sa_=v4lz^m=3P_cCSz?-AjlP7_wY|5wfAE3!>U9W~+-v54rx{>hM1yDbl;38lNt3zU40s>GLv`_sV#?w(vza%59 zAA^|4(&CXO((wNjLt`6kKNv>Y+S}W&?Eua-y7luz2e5U5YRtvvEs%WNwv5ipB0P*B zXKY`rxd)I;iXRPXL@Vox%J$6?;hfva`wMw&2gEkSRwssKYgnD0*A4hWU*Cv{fDHrWD#8)g|| zn9%VQu8MchG}O9I$0d-K%cCXGu1KN`s0>u3ga>_>q7~UU+_CLt@0af_o7>!$6y~Jp z)J%uBxv2;adl&GcfSbJIQto6O!o$i9+qD96&4>^@#(Enf4i)%pRl!P6Uans~(Flv9 z3and-lt`N))NHfcp(SyZG35SS{v&Ovvx(^@UIVdtpv-iTgrV6;RK-Vw)6ZYd|crJCN;xC(z+c7_&b{Fb8LJ#71%Cx4#E$BRE`xlpZ3Ht=o> z-7@*M^SXGw?$jz5m(`ce*^Y-PEFnfJ6YeNA+;nRNx6+m$1f*FcRLf8uTbGu@HP_)@wuZrr>Kk(x0* zgl}u;(7Cg0Bc@W?bFkM05nQ-T-!wVe?nQs+YeL$W4FLVOu0*-i!SN|n8{S$Kh348xn zwmn6m-$&T{R(yD#g2Y$KYk7E*f*gk~Ot?O;zo8ZGloDH;fHOD?L;CJB`RArvtT^3jc!JIT zvA({VzV>#t3-mzrI;Ee1pL$FT4etX`F<~8BsNUMz67T#7T5z=t`csAwjc4ve7yr>Csrns*DUty#x2sz640|49-;lO^Ta(UNHWV{Hm6S{F*&~59 z|I}~`WITmIY-jBPi1RDlasw=?KTFI&O&)X*ITLdBV2$X7g@ud5yIskAS%7z;mQrn- z!8Zhq?f^0XH%vI~x7`6D+3EX+big|V#?Au-re=?A(4dsCGD&3G9hax|$*ysug2U6C zc8ia{YC>lb`L@fAd$JAqTwyzt)zqCBhUNkkrCR$ zSMhFu0=)5s`{w~xjcwFBrW+{>7a_sJ0NS3iS=3#*G;AS`oFt%ywD=>S$z%O$Y53Dc zg#$FPqGIg$>058J2&pUe$n7h7-MgC8j`GlKAqU;57kYt@j-*+vlBz;s9^MVU)Z!_i zS*!1ktBad~h&lE?btIUaQP{ETQ8>KZIqk_ge*=hyd15ZE=@xt!15v#b!RYd`BXd(F ze;h5bYQA=&hHIC+I=A(@gS zac|I^k603U=_5&~*r396JiBSL`vIJO!9H!D2#n|#oH(F}QWYCEQ5KT;4xX)`-L+|s z;hSD8Z%o8h!~=ds0HJTVzcfybV!K4V0^8qy14c|8+%qA9U&S({27|Fh?~y{(HsiU6 zKU*to45d=pT0GW_oFyKora&nzgieVA1ACVabvaE`(S^!H!V64vgI%6i0}acdMYDs* z*{erMVJ1PMGSPYyya(MFcV%mHON%VWmsI}9`SK^YTjW+gqxL6`>;2<|p`!SFlj1O` zDY3NOtf_P9d}?{t%9hXv)Z>QyWq&r_E=v9SPCC07E$gp*q(#Hwx*_e}Rx#yL@GLt; zQu9bE*Mz2dy=Uv5&Y6xltqZfm?&ICs*TFF<$;K80AFWsfjcj|-2>?0;zPBafM54(t zkbp(00GfD-r`fmQmytbwjAG{}qIv3Ufv8+}1RbZmG4ofEFn2@{UM|KLX<>Ll#Jrg% zop+E|pSCR%?{2N5o5Z5|F;x^`Q;(t7SJ=p+6(_avR73)PiFVO*%$nT5wj8mzTMn

)x_ z7(WL^HIzVw`F9I07YT@azp(;$$^Pw||8|9cvniG^{S8b0CvX1Wt&M{_nxsb1WUFwD x&b)kY5ai*J>Fh0QtJp8~`TzFwrT65FymsWZrQMq};CqemG_UHYmMU9>{y$h%YqtOZ literal 0 HcmV?d00001 diff --git a/docs/imgs/start-spark-workers-on-mesos.png b/docs/imgs/start-spark-workers-on-mesos.png new file mode 100644 index 0000000000000000000000000000000000000000..c62ba2690d87b4e9755e2d13af7b265aa90768f4 GIT binary patch literal 16084 zcmd_RcTiL9*ESjym7<^`pb{w-PA|NdUFmwVUh>aph6CxIR?t6S|*SglV)_!`^ z?AktoqXHlhXrIyb%eO(Go$(+Lk15}7;7l#gQC|>9KilZ?Maz&Mi$uqwdo!=))|()O zX4a@fDe8hN7o1MUoed>jQ9o(v#Fu(g{nUL6>;dR~5}(p5c&yls(=M?op6`y`xbJfB z2KOZKl2OLDOZ=xk|BMswN&e<^(nLK(=EZOb8l~J)UFbF{vOT)7u=*=ofsuHOeS(Rj z?Roz7Sa{bnQH7vZDyNd^+UE|4^*{Rq57S-$`KS@esLhS^tF~2SH*bEF+WffvE_N3P za#b?q=R-eD5W{ZA`-5Vj3lQE|L(=~L-QWE}JfK)Z&_5Ud=l?M?g>0@3nTz)loTJKT z!!PJ>Z?1N!VM14DF)*!MyGZ0z)Hdf`Yera`O8e$YhcgvlZf{>159b4Q@Wgj+%s-zY z22WY7qW&!7r2KT*3$+p|t$(nIRM|~8qT*B}lZYp0C_6&IWGbPOM#ao%ln&5TvC!@C zQ2L0C@Me7gm`dl)3KB#5)2B7%5c|mL%_=EgN*Ui5nE2tEROAEyh&Q3LS;q*b(O=Ov z6%Nf*f6`6JA?_nDNpLNfLizR3eoi{mla}2)8GWxgWO;g8n^5669;VpaIE8GfN9r+#EzFgGx3kbw z5v$pbU$<9n+uRe*ji32*vUMc0ZZqmUt#!2@A09cEpJ5G?>0?p)QRDt5Q*yZxHT9hF zMn69ZA@BCWZfSh=;i+(RF`itn;tH97wNSr=_h?e}e8q)UwNn0|=&MI6bP7}ldID|s z#c!O?W@S@x^R)x|En|4L*!;Y@wnSb z&tI&Py+4;U1EDXufZ}f8+hiyRG5TKT*F}n2TgpsJ$gm-G_z+%DgpD5kH5gf4?T>8#&AIQ+|k; ze}4(7RgvxcDq@n}5+TR-Kr$A_%3b{rr{3TPoJXNFxF97lZWYw58Z1YJ5avfNc) zR4CAgJ#{O_`51L`VyA<$`mjef`6VL!bT99P-oz{V1cBRQ-61m4GF93+z@Oa?&*wqZX<;{aCe6sCAEg&It*{!B(FhC#kk#*=Ke9azCPAiN-HZ<^Yw-3M*Q-PD;Q)hGZo(^uwz6A4TF^3Nse3`AMHEbo3h{f-KkvvMSJ0g zqC%@-2%euSGrjgWtz1*iKDu=i?*_|6;q(l>V5@+|{=EE!K@CYLhwEL?1N?%0t{u%x z_wlCW?xATmW86<=huxc67-6E0moOFXnD=|CaPp|L5}khvp}4}l^-zTt9N{-NnT@B6 zp{yxI?H?wOM&0E4J*nABdSI6#sN;K3C?-PRS)V;;9>tPk>d2T{L2y}G!a87O2{~IA zY3AOXdKjX&yL8bC5$-S}CMFt^ek{oiSK58H|3|Dk^>L^ERW+A9NoSArQOO_!z}@=F z<@GFA;tp06KvxFuT9tDOxC~BcPM+mW@39Oe%QZkt_%@H zOlp4@`*AD<*2qr_H4vSyO{!m7*RmFumEX&NXAWqOFGZOne1uMb^?^5R(>CidnJU{b zjDzLE4iH$$;I&GWrZXl>rTyzpwwk@C6{PSI(Iu|a>C6(FfOE8xzMg3s{Jpr^jcS5b ze>~_mSYk~)DdHY1B2pypFMVi z?V)#WtZ(@6@R%mq0cX9+Np4RYdse&6P-`VmnPUA63a*_2PtIAtR;LWmUTJS%KCl-A zKkJaCqUF(PwV8TYXsG7wmhymv+}3rQ*0%l$TP3Zz$wn^)duUxc!cK1ip4ky*OR5S! zM^pw|i}#KxcdsRos?hEQ`F=ThMO#j|>hj+1X35Kth4LRQ{PC{OsUapwcOZ%*_N1YW z03(0kZBaQ{Dp6|Am^UizhoeGmNpLUsemBw02aDsrkO*pd@FD!pio%1r8=hG;r`n+;I)Je(MG6*VUo5n&idd zz14F!pk5Joky>WIQhXV(Dh}=V!{Tkd7mjKg{Z*G(CAC2nB0_;VB0CDv?-&*DUDx|O z(wH_=wRLbpQ@_9F#8HdPe3+I0I^<&qjVzx=@s`# zC_YJ#F>0+~ifAcg|49fF{}z@I`)2a^h~b6u)C!bn2`f+ah?ZvdZ>P-dfrcX)bUa0yz^GZ#>z-UXhe%Bu)z`=Z~jhOS;Sg9hj|YUkJez;#EN;-kl`;{-O~j$ zdY70D^N~WOBkpP8GWd`d{p@+CP8Y0WdtX->@wH1sXSrhcDG{uR-Hh#;^k{~_H9fF< z6l>AEo3oUA4f`X+;1gz1@^c~9MSmMPo`SsiUKy-79(3u46V}BK^=4oB1c%l(_1H~U zEK+kkb{a{2iMc0%P)sy}D9-gHTYq|CH2jPY1P4jBiwJDbL})h-eE}?wU)ipE=@GwJ zXoMZ%du+TDfB$xdm6^!hFzM@8+WZ>hqb(y8e!FFdqHf8LdL|roHYhi=D9;#4v0@&X zSkjLliK+h*duC)s1JRPCOgo1Q*z2u5i`Wdme_g@0o#`}b6LYEDUZ(I(F22pg>-NPH zG&S4j(SWV#oJoR~<(*DMm0dFc;-whg-n$u*JZmi5iG-KvdgG7Xw@}iSgxRCt2=MhpJ z)p4hZCu)gSFqLF_`YvqoQp&ig^^n}5u{r%+2c=K{H3EUR{C`mJyiW$ zBe#NOx3<(pj#V4?4L5MlL>=93oJTHPZD^dIV#5-zaQdV^RW}Kfs(vA#91%w?fA-UYbU|!I2!M#G<5e)P*temoSykCd;;sTWMIR5ztij<Mt1IQOpsz5K^>l{_>V8nn0S*6w4aFY7kcP=P^i>ov-Z5(v_G0I#Hz@jpeC^r{GVX};>gpq;8X&=A zQdJ%u?u>KU2?C1}JhGY0F9~mj-Q4woYuzMO2nwe)beu4JPSUH3644_^ibrm&x)7b`fSBnh=SnX z)ZeI*1vD^!25(K?0;caqs>&Fe`@TGPOQnz}ntVnh$$j^rYv2`DFp?6Cj9i}Xl2R0V z?q-nDo1wDJI(+D1TRhGHb?n{pPf*7)9#@kS3;`u37}xfReLA772?jy*)dihkbvY8s z+tr;sa34;OPpzn$>%sctJ3uWd5(+I*VjVp%?M$f1Is#Y-bVrV|#Ea$aqzdinc$&(V znZ-P5HkdvVzY{cCjD_P#AV{@3a?`mlV_ZXg2T!!DW5QNggZA8uimRZ)YmfKebJ%e1 z-zx)(woqxzS2g14m`l59o0_Dz1P67@1$r{F=lZ0AARx0gYHgq7$!+)9hASYr{F=mx z?Ku-~8BqI6elPVUUGe}|R*;+xEIKD0TX)zg00Od|p7&Wz7jLF_T>*qKORN)eH*tXq zZhswjtv_S2pv3r*O{hQV6T zH3*d6jM_d4aJIitG$+6lv$+`2tma$+0fXQVW}K{=Usi`(@xYT=dI1C6b9K5%xkIy8 zyFGyMght~wR%~kf!cRB_LO?&a&QF>36S(K?y6amY{2-}C$Xhx+mhm>y_CDX)t)8GMc^PgYohnmk_f!%Vs1SC(-3|d9@MoB7p1*PLaGdqUPn4<8`=+z^joZ}UBJK%%A`FDa^?!DLzlFL$C164hGO;oao z&L-}QPE{_CZ?>r%Yvg&m@ga+Si3iSFV>6r(5D=uVpU5A}|2XP590b3?A&_PzuerM44tk$%t)*!H>KwzV0sk^g0pCdqAOJa;0 zc*(ui;GXxK8(ZbZrmwpkK@1~w(J%a=<|}c^@*sFCV0zM9Uhb#)K;g*}3kCJxeg=Uc zn8Erw*_UkB_?^+jko$~m&uSGwi}2&R>f%skO&?|?FfV%3$&@p_#W)9=yjG=ToRE`Nn4=L3q z;YS}m2PDn8k>&ie_Wbka-5|59uTX}Hdp3C=64>6L+CF?|-SL8R-&=Qr3Ondo>|;Ej zA>}?~U=&#aa3(zumdvD0R5wp>UCGs1nd=I7-8J>da8M3-PUp46)a>piu1@akY8Jmc zd+PHhFV`Ez>44y9&hpnQ?$NG=!jhFs(%U`-=lMWYT9F}N$cz3i2Y&LgXq4V<4d<=+ zjTJ48ojp9!oZv`C_S40!j}aU6h>dt5!H)0CpCdL@R^41Nn}m%Og8`F^`#|6qbz)mx z++DxbzOgbj+Z#ox0)Z3mtaDIzn*0lsw_38Yf9wS9p=_dbu8kPn7H?jt0!>WaSB7JdeU)d z0N}f5S|I$N@OEbS)R`*}Iq6?e2gPg&x?p7fNl_sl(td#M`G>*zKOAz|Fz$W(FL0@z zzsLXQQ@UmFb%5A`IkY1q$#aFd80vDimY=fZK+5(`tV{=xkCi;S*NmXz^$@9w&QXy% zHSY8GIg#c{g)_F&V0j~92)N)vRwc%NZZN+^a+^zuC3*C03%7tw?(d2<{Q2N9cYx5< zJ}vR2Hr9M;u-?{8hVz?e>e`69Elgp0DqKS*39V9;?HrP=KMkLB zo=3P+edC&)_ADSiS8rJ-+2~bOw)h;8IO7z)SbxQ_rZTlVF80$c+OwSYm(RsjCh4uH z7=0|M7@*{8@{-F>*|>JAxCs#Ievgp(pX!~S4Q>OOx!l@iDUcs=G5np=uB#Qh3p#pJ z5L9EQB)7R;N4Qfv0d^OPL09EOO@)vX!m%z}JmDaiyY)h?Fp-@b0<0nL5|g z=kLCLddrJADWCR}VE;k>E#7r*P-l0QVjE#thXfxWMY*%D_QCE{hG0wKbr6WW{g)Ra-f2Y%)VgY2##9<-t_peduldo+q7OE6S0fehU za6)aV+6xOrO0jgRSc|`+>kgvEfNpK!+oo)___nSZ?EHcmbAs4#ow2{M3O?#|^QMi`!w>T{s*HAX(kt0b-A3UZ{i0>X4?D3keDTrnQL6M@ zMdqEG8ZgEelSoJDYxg=^ZRvMcTHrd z(cc|q+sODA9=XNT05hbm=&k-SSwEH_1BT8eEuNDD9HIEOmW9z%XFp<)8G^g9@8F+h zVmr6GN|7@!w+9|yoXX;O%}<^`bGY=J|AP3WU`e&~?teV18$GpTfzU;#Lxv*(UL~NcBs%;K#zz{m2%N8U{{F-v6` zAv4ko9_;JuTD+Y~IJuv0r>dcK$@c?v{osPTZ^T`^IVul&NQC9pde?7c{$bHT-VYA} zM>%l8(uiNgR0dK8Gk-gj*oq`}-_ zOzaJj!WH|YLKnTh=Lp0{2909Y#uh~mv2DNjN8u`Jx$Cdo=^_xv`^mwG3XsfXyY8{_ z07MzyKG0!##++pNq$(e>K}_jn8Xj$evz2 zm6?}lO$!}MVZOTaD*AOUPC2hQcm&M}x;GM&$$X__GwC{ir#Lvl(e#7}dswZ)^`$lD z7UL8SBBM<$h*SHhi|APB9OX3Z`ShCa^gy3KO{Nva3|;$%&9g*W6SO8rLjX2g7ANO!Z-?35VkZdeuivSL zDThn}0AzXlZ|R6~kLaK{Km%%reO;@oP_j<_q7Uu43dXi9n=@LeGsKN>%K(bCZUo-Km$4@ zsj#Qza`eK3 zs1)9+tsmmb4ow~{0L;LY+L#GnPSRbuF5ho&K-uOu4`2SGP&rZG2dw}w7Xg{qcvI2< zfE?42bj0c^!Fh^PEw}fLcHr!r#aX=G@*5!1&LGaQ<_mH_k8=n?ECpJ@U;^b*Z zE~=%x@GkQ`dGo3}@hf9x)KFNjj!%bMRkps~y9#*0xYC7d!YFEg9cwb2=Icw&@^~{A zLO}(Kk;)oS(U8ZT0#C-`H!<=)X_A~ z(99cRLU;j`z9Vm4Xj>aJW0<-{+axGKk5ZM^Q~pOdGMFpO7@;dN>@G0 zGXi1htY$yC@)#>x6OzNaJ4cjeCucvPgLmHQC$|Bx$o#^5iQw-Vx4jYKDo(A1BbAKG z*_hv?D#8dWPk-cirr2N@(zy|86*vJDAp}aauZF_5PT*2P1^1N;Q89i0(~pVL$u zS(MpjD*Cl8nPyXa`!}7k76T#qYpOBSY*tv9HQz!wjq2Xc7)+4a{9!8e(YeNizFu<`ZCcyMhr(J?*k32LONY2X%glRMp?C-JU33 zO1*m@Wy}t(1_*nMzd_&uO_|DukroTU=*nJP3qPt?c3Anq>X#CXYmDAAxBF*GAH{`odhYUQX!a*uozWHc(%cC;`) z1dkMnSc$$Jn{pb(hdrB;`-T#j*5)yZ>ujaT=dDWa-3=g6?#%c} zRozEtCt6a8k2}{clq?n0`pW(u2TEzEI}zL3?UZUaEARz!t6VPIJ`0u9X51a5!GJ<{}Ul z%#9#rG4eKz-ItEX-8M~V9f#Wv?ow9@Iv4IGbzR?Toxa85cdw5k2Nk+&QEg^!NSUCl zv9Z|J!s609bjfZJtBP+*=O2$*$9T83|EOGEh0?b!a%|U_nX1p=MehYLfpVw7_^#OS6LIjgqQ;zflp@ffZJb-*F^{$eG-A|8@2W%N$L9`_4ecYT;H z*6x)B&X_!!Db`#JJLcKe8^(#PQv{nG>wLg0vu;D}n79263vMq&Dt0gD^2N)S{5&80 zs+XYu(INNS$%jSeU&JD9jr)nsnK{;&`Q;(|1)z=-F6g3cM%m21Glcu@=0FaCnT>wY zcQXj%xDlV%T3!3hE+QBciypr{2{$#pp8EX9SbXfS`J>}<^~E}-^wVF$Kc76u!5<6X z7jGr^)m1d5mNO}`m+Nug0jAiVQ0u>V4$LTZJd;y8{%bg`d=cv+dShJle(Cj(Ibs-x z;)PxJ{nP+&xZ1jWm@zgy7}s!Vwnj~W8rvD6sTaLOM;Z%wh+2{MlfDHnN%s=m#3Ag# zfmN~|)2pYse&B0cHazo>9U^{A^&_#YFHGl6c47Kj5Hz^e5gpF3zm~W3y(JB=bg~=$ zeRwdw_v8ihKF4oT5{46K61?nM!IcACK}n=4+4LhGf~CqS=W0h_oh}J>U}j6>v?v{8 z1Kw2n%ElH*-W(`hg&Syj3^tb$a)(;-D^83L1X^T5uJD)MFm1xj7}y0#Bq?Wsgn6nHv#mI$Qc`73 z`FrK*880Y{N*Jj?^DhGP6@Ad0lID-BH|Ko8TT0(xK0kD0?)WXo3oAMEDb*7E)+0AE zqtmLg#J=X(7F_brEk8@tzx|Cd?!P9nf?o|psP76*w9hSnV1RXTbX?SG?OQ(Lm--Me zWA5aOH!H*^-LL(WrHvX{11pfQH9Dx7$;&W2qqklD?Pgf{^!GC>jB^i&=w=>@Nt-{`7*n9Yw%3MiuF1uq< zSFYT*zVAV*3Dwb8Zd+>SEx<9CQP%A3%M=_>4IX`6Q85PFGW`kmyE={o%WlrlSGVi< z%!o~zA2o=!HD-y9Z-$a0+qGP2(`aIw0NZ~_tHoU5J`_m}rq<=j=U7?B8O9n;X#-Yv z8S?8;lD4i;Lw544s#13H?teuFHoGndyEr_G>AcM}!)sjst0K{UtPJ)YHx%>#3u*;C zfIV>Yif9DoKe#q-^^z$()ongVBL0*}*FC9*Y}NlBbjLp!`cS`o33nteZskJXn~>5X zu7bEwnhTUK{A;8Ff6+JqtN%-?{|5u03!ix{NkMx7%qx}@xwSzF)^uU84L181$Xh}l!9xTTN>)c2(*Xn=xIEgc(jGw|7hYVN(LpFWh0W!&0~GbQ*2(1; z!T=Z!XwGsQXg4J7=T0SFTqrkUrKigk0MUPnh2s&5%FdVw=ftiS8m5K*8ffS$2F$J` zDjKi_KEQ8`bn?fqiVSr(2U0XK08QF8;L)FBck~H{!x&Eh_)I_Yh>bH5pvAPrh&MBO z7?YtCR2RCkjq#%!==G~x<}io4LL|9Fr#O|%rb=+%KDg*k^idf9%Hp>OE$8q7Ld;h$ zCSF0KW)I(!nhc|sCjeb{v^m>*;h`#Ax(e@Ia7rs_MEz9m#l>s-*%4PQxQ#nqnkzal zt$V;!U6uA#l*qrCjA%aq!B*gzOV2l$apF$4n`{6&o9uqiO&Sj5gn2{j>k=-?y*qKy z+krXV_1vD67yn*Yzk>bwM3Q3n&WakClY)##C8;X0Yjwb&oj%#^x(qZ#`sxCKoVj2a z*L(~6Qyw=Bki4jg*&2NB!nN%(uO5%Hq7|#Cue4Pi*NibBHgEhdMz>!mskl8?Mf=?* zbz^09W91u~qk)+x#vD3{77rgYd zJ#b!%o8}3`zlH1XK(dB%w+NwXD>M91jCUG)JBTvn$1N6Y7fK=5U2Fj65Uj8cv?3Y? z?|OavVih_!-Rt2EW^diSE0;|4+G)mEbJMyNfb~|C44+A}X}~W!T83XQ^$!m1w*(nC zl`P5@EuI3)I^e6Xg8&E%)b}hr4=B!kv7_dXR4>fhzOw2D&)S;qTIIzGL}@ozkg8(5 zoV2JEj>j^}o_d~1B}Pz&4Y*nAAs}ar;LYiY-V1cUi_$I65}{$JMy1*a0uM zuTwS&rVg6jFcf?=>B^;_!_}wxZH|y_$UKEm5}-OD=m(m3p%x6cO@I#8nvvH)@1#hW z@QJTEe`pg?wv_XQTRi_{>44YyBfY8WyycGB{O$y}z?+t%pSMMt!laLStU5HTNvB$u z`sX-8>jvr7!NFx6>IkWi#Z_q_sG}S^q*FJFn5Bn<+v1K_rdf}uf!#Ti2@eXDUymPq z?uXfMFNZgMS^u=02@5WZq*!nDN493ra#(w~`8ZbQbWU97Be+YuY@QLw=z34P=dF3t z$(0s;e)1501QkoJ?df8*ni#IF=NUckzF?B<~|f#ilip+R{7Ut zBe0|(-0_ln4cZx^VX6IW5Sa<|&;SiQ=UmJXQYe;g_ZSdV+1n_KZ4f{z-29v7hup{S zi|@r66(6}I(%r56)b+H9%~0|9iC;m2uHwTUZ$jNmPn8D?_l>qg8!wqL0=@30KcJQQ z2Sv6{0m!1o^*c^1(UnzzCDj0jmWdA=+GlI{gIJff`M+4WJeWTZw8%~U-{bfd`y2 zu||{r+pfoP9Su_?)k?d{w2J&tDZ4PO0ZAc^W#hIaRN|GM%ug&wrxQS@Pg-O#tVml{ zbX4SpNNcTG;;nkKNi>3s6aE(}<=On!)+YXgE1vn)XJ?R-M^x`eIj1kvtKdLcFo6l>JE{;okq>JWElL zS&99biPq573Y^G=`QQ(+^@VT3y!?BgD|+V7H>RDQLuOT^9x={rk6Xl-FUnfF5o23< zzgJ%=zb7$f-H=7E%?&L_cqsMpYPV>M@qhF*>KpBBMY`|m3(F#&rPAR|{nbzE_R5|u zl<<3#D)x!Dr&`dDp;w-~5m}zWIZX4n{iwqx*oQ{`cMR3|2PDh=ODxIfT!dKO+66@H z{prTE$HhJ6gPnJY_Ioz|+9da0ZZri}4)^Yc*8H+21ZRYP=Yg$c&$IzeohCl#g4`T_p^`!zoJNk(xgiCX6DR$gyIn*C?Rach)dt(kJV zd(XnhW~S%VtR?CsJJ}cS8*Kd&U#pK%-ljP!c^eck3~E))+pO^eI-ycy*8Rk>ZGFR4 zOUUNWN5W#g!;DEJd$CJ19YO!;*HTgiw?8#u)~I(7!Y>Rm&_n zLOrS7D7R)3SBjeF^^{QoKi9gO_v^_ ziM;kbb-f^)Yj%?Uwhpcw{5Id_ra2HxxSn}qy>E+9{h1Z& zGqJ5hqqNZj5mVbv;zp`DUVWvUo;Y%f7*PtIcbwt*H^aJ>x97 zn(t(9;xa(VDq#W`htL*o(Rr6S`tZFr2cvL+ElUr|{~TDG;5Pp3L241c{E+UMq8g;i zeSq0wkhmP9m3U$bUCh7Ev6 zV`w?>kmQ4!0kC<>@Me2bRHb!kwbRQ$=Xt*Ip62R3eUd^r&{@Ps9SWzc#bWbBq7Fh^UC>8|g7;c%+>_m)cY+3nU&gag|9{u%5e= zeChbE?;&*ghYACxF3D9E<3~G(YK5Bl%276}y*-RL$Ek_N&IYtfW$Qw-)A zl%yha7aW|f;t}Jh!h5hF?D`F7Ms4p)f2z1ERdkrk>~Uff%OH|_EN?7#R>O)hx>cldbV;OB-K zb2_ph5hqW)A#yhueM9H*Y+3ulJpj`GwUVY>uas2YPdPN!db8WdlcaLO#gz%(N}p2Y zNf;rZ$pZ2fd9n<@YZ?)zcIA@GQdVY1n$GokNT_+SpC#29x?E#U_Ws2TZ6)BW_r;HG zgNI_3i?>kCYV0Ky3Ph&|!6s;!m#g*el(OoO8T=8*-{C@%B$ENFM{YqY>Xju5S~F|8R6K zCL7*VCV0&;6rALl=sI~VkzG@vQ5xGx6k=8QR!H>68UEwuZCu~f3lpCLzeldy0V2R;NMBh7k7>K4&(OP;n|fX~@ureuLT&sH5d zfKe=VAtT!z!z*^;|X9|joOqM7Q| zW(F_IM1-qDmq8Z|>SAM=2T3Rzix z1oCxVK5aQlgqCP)m}-x<3RNlloJJCbdG0~MEC?x-l8h+!soP-8iXd7ME}S@=77}Ol++gTHgfm(t#rHX!@+i z%z16WIL1lN6*cv}igz4v!LCMjz1{N^L{y9<|YD#f4c^WTN3&c1-GC{FQM3=%*H=K{ST z*CEhGgD`YFBhnZoDMvD@y79AwnRx+k_yZt(%VRhQ+(0vHl|TzD8?0}vIE{Z%%unW& z+}Za;3deK#k}spJK^xJf{4m&OpOdU(1pZiv%D?*zV2ogM#jgZO79_^pnUs3RadOcp40DC9iJ6;D_^}t8bysT4gX=QxHVYPpF`Wr`Qg)?)&!nC z68kl-`rkk=g453A1K`9JnX;in8@by&)w`L)i4usHZ~rHg=hln`urmLYZLvrGes=md znYc;X`?sPz&#fdE!FO}}^8Tq4bLl0Z1pZHzm<#54|5GF8GAlp{{2y5bJb()DfAMGl zN(cS_%9~oWrYXvF9_BGe7FRxf)#g+c^N#=lOEL+(qU7a&Uu>Uu)hwy27;XihI`cpKnfCu?G`It2yFnqpO;rCKn0wjiirM9gOLt=aFUPgy A;s5{u literal 0 HcmV?d00001 diff --git a/docs/overview.rst b/docs/overview.rst new file mode 100644 index 0000000..6392d87 --- /dev/null +++ b/docs/overview.rst @@ -0,0 +1,126 @@ +lcmap-spark +=========== + +A simple, portable environment for executing science models and performing exploratory analysis at scale. + +What is Spark? +-------------- +`From the source `_, Apache Spark is a fast and general engine for large scale data processing. It can run on a laptop or on thousands of machines, processes data too big to fit in memory, and moves functions to data rather than data to functions. + +Spark has connectors to many data sources, offers interactive development and is open source. + +Read more about Spark: https://spark.apache.org. + +What is LCMAP-Spark? +-------------------- +lcmap-spark is a ready to go Docker base image for the LCMAP Science Execution Environment (SEE). + +It contains Apache Spark, the Spark-Cassandra Connector, and a Jupyter Notebook server to quickly allow science developers to get up and running on the LCMAP SEE. + +A base set of data access and manipulation libraries (lcmap-merlin & numpy with MKL) are already installed, so time series creation works out of the box. Conda and pip3 are configured and available for installing additional packages. + +lcmap-spark provides a consistent and portable runtime environment: Applications developed on a laptop can be published and run at scale through simple configuration values with zero code changes. No more worrying about scaling your applications. + +Just write your application to use the Apache Spark API, test it, package it, publish it, then turn it loose on the SEE. + +lcmap-spark uses Apache Mesos as its cluster manager for distributed computing. + +Anatomy of A Spark Job +---------------------- +1. Create SparkContext +2. Load and partition input data +3. Construct execution graph +4. Save calculation results +5. Shut down SparkContext + +.. code-block:: python + + # Assumes read_timeseries_data, calculate_change_detection and save_to_cassandra + # exist elsewhere in your codebase... they are not part of Spark. + + import pyspark + + # create Spark context + sc = pyspark.SparkContext() + + # load and partition input data (10 partitions) + rdd1 = sc.parallelize(read_timeseries_data(), 10) + + # construct execution graph + rdd2 = rdd1.map(calculate_change_detection) + + # save calculation results + save_to_cassandra(rdd2) + + # stop Spark context + sc.stop() + +Apache Spark builds a directed acyclic graph of functions to be applied against the input data and only begins executing these functions when an action, such as saving data to Cassandra, is performed. + +The fundamental data structure used is a Resilient Distributed Dataset, which is a `"collection of elements partitioned across the nodes of the cluster that can be operated on in parallel." `_. + +The `laziness `_ of RDDs is key because it allows Spark to avoid realizing the full dataset at once. This means datasets much larger than available physical memory may be operated on. + +Ways to Run +----------- +Spark jobs may be executed from a Jupyter Notebook, a Spark shell, or from the command line. + +* ``spark-submit`` runs Spark jobs from a command line +* ``pyspark`` is a Python shell +* ``jupyter notebook`` is a Jupyter Notebook server + +See https://spark.apache.org/docs/latest/quick-start.html and https://jupyter.org for more information. + +Full examples with working configurations are in `running.rst `_. + +.. code-block:: bash + + # Run any job from the command line + docker run -it \ + --rm \ + --user=`id -u` \ + --net=host \ + --pid=host \ + usgseros/lcmap-spark:1.0 \ + spark-submit your_spark_job.py + + # Run Python jobs interactively from the PySpark shell + docker run -it \ + --rm \ + --user=`id -u` \ + --net=host \ + --pid=host \ + usgseros/lcmap-spark:1.0 \ + pyspark + + # Run any job interactively from the Jupyter Notebook server + docker run -it \ + --rm \ + --user=`id -u` \ + --net=host \ + --pid=host \ + --volume=/path/to/your/notebooks/:/home/lcmap/notebook/yours \ + usgseros/lcmap-spark:1.0 \ + jupyter --ip=$HOSTNAME notebook + + +Shippable Artifacts +------------------- +The shippable artifact for lcmap-spark is a Docker image published to https://hub.docker.com/r/usgseros/lcmap-spark/. + +* Contains all code and libraries necessary to connect to LCMAP SEE +* Provides a consistent, immutable execution environment +* Is a base image, suitable for exploratory analysis or as starting points for derivative images + +LCMAP SEE applications are independent software projects, publishing their own Docker images derived from lcmap-spark. + + +Modes +----- +There are two modes for lcmap-spark: ``cluster`` and ``local``. + +* ``cluster`` mode executes Spark applications in parallel across many physical hosts +* ``local`` mode executes Spark applications on the local host system only +* Switching modes is achieved by setting parameters during SparkContext creation + + diff --git a/docs/running.rst b/docs/running.rst new file mode 100644 index 0000000..c54e4ba --- /dev/null +++ b/docs/running.rst @@ -0,0 +1,250 @@ +Examples +======== + +For a full explantion of ``docker run`` options, refer to: https://docs.docker.com/engine/reference/commandline/run/. + +pyspark - local mode +-------------------- + +.. code-block:: bash + + export IMAGE="usgseros/lcmap-spark:1.0" + export MASTER="local[*]" + + docker run -it --rm --net host -u `id -u` \ + $IMAGE \ + pyspark --master $MASTER \ + --total-executor-cores 4 \ + --driver-memory 1024m \ + --executor-memory 1024m \ + --conf spark.app.name=$USER \ + --conf spark.driver.host=$HOSTNAME + + +pyspark - cluster mode +---------------------- + +.. code-block:: bash + + export IMAGE="usgseros/lcmap-spark:1.0" + export MASTER="mesos://zk://host1:2181,host2:2181,host3:2181/mesos" \ + export MESOS_PRINCIPAL= + export MESOS_SECRET= + export MESOS_ROLE= + + docker run -it --rm --net host -u `id -u` \ + -v /home/user/mesos-keys:/certs \ + $IMAGE \ + pyspark --master $MASTER \ + --total-executor-cores 4 \ + --driver-memory 1024m \ + --executor-memory 1024m \ + --conf spark.app.name=$USER:pyspark \ + --conf spark.driver.host=$HOSTNAME \ + --conf spark.mesos.principal=$MESOS_PRINCIPAL \ + --conf spark.mesos.secret=$MESOS_SECRET \ + --conf spark.mesos.role=$MESOS_ROLE \ + --conf spark.mesos.executor.docker.image=$IMAGE \ + --conf spark.mesos.executor.docker.forcePullImage='false' \ + --conf spark.mesos.task.labels=$USER:demo \ + --conf spark.serializer='org.apache.spark.serializer.KryoSerializer' \ + --conf spark.python.worker.memory='1g' + + +spark-submit - local mode +------------------------- + +.. code-block:: python + + # save to /home/user/jobs/job.py on host filesystem + + import pyspark + + def run(): + sc = pyspark.SparkContext() + rdd = sc.parallelize(range(3)) + print("Sum of range(3) is:{}".format(rdd.sum())) + sc.stop() + + if __name__ == '__main__': + run() + +.. code-block:: bash + + export IMAGE="usgseros/lcmap-spark:1.0" + export MASTER="local[*]" + + docker run -it --rm --net host -u `id -u` \ + -v /home/user/jobs:/home/lcmap/jobs \ + $IMAGE \ + spark-submit --master $MASTER \ + --total-executor-cores 4 \ + --driver-memory 1024m \ + --executor-memory 1024m \ + --conf spark.app.name=$USER\ + --conf spark.driver.host=$HOSTNAME \ + jobs/job.py + + +spark-submit - cluster mode +--------------------------- + +.. code-block:: bash + + # must be built into image at /home/lcmap/jobs/job.py + + import pyspark + + def run(): + sc = pyspark.SparkContext() + rdd = sc.parallelize(range(3)) + print("Sum of range(3) is:{}".format(rdd.sum())) + sc.stop() + + if __name__ == '__main__': + run() + +.. code-block:: bash + + export IMAGE="usgseros/lcmap-spark:1.0" + export MASTER="mesos://zk://host1:2181,host2:2181,host3:2181/mesos" + export MESOS_PRINCIPAL= + export MESOS_SECRET= + export MESOS_ROLE= + + docker run -it --rm --net host -u `id -u` \ + -v /home/user/mesos-keys:/certs \ + $IMAGE \ + spark-submit --master $MASTER \ + --total-executor-cores 4 \ + --driver-memory 1024m \ + --executor-memory 1024m \ + --conf spark.app.name=$USER \ + --conf spark.driver.host=$HOSTNAME \ + --conf spark.mesos.principal=$MESOS_PRINCIPAL \ + --conf spark.mesos.secret=$MESOS_SECRET \ + --conf spark.mesos.role=$MESOS_ROLE \ + --conf spark.mesos.executor.docker.image=$IMAGE \ + --conf spark.mesos.executor.docker.forcePullImage='false', \ + --conf spark.mesos.task.labels='lcmap-spark:{}'.format(os.environ['USER']), \ + --conf spark.serializer='org.apache.spark.serializer.KryoSerializer', \ + --conf spark.python.worker.memory='1g', \ + jobs/job.py + + +notebook - local mode +--------------------- + +.. code-block:: bash + + export IMAGE="usgseros/lcmap-spark:1.0" + export MASTER="local[*]" + + docker run -it --rm --net host -u `id -u` \ + -v /home/user/notebook:/home/lcmap/notebook \ + -e MASTER=$MASTER \ + $IMAGE \ + jupyter --ip=$HOSTNAME notebook + +.. code-block:: python + + import os + import pyspark + + + def conf(): + return {'spark.driver.host': os.environ['HOSTNAME'], + 'spark.mesos.principal': os.environ.get('MESOS_PRINCIPAL', ''), + 'spark.mesos.secret': os.environ.get('MESOS_SECRET', ''), + 'spark.mesos.role': os.environ.get('MESOS_ROLE', ''), + 'spark.mesos.executor.docker.image': os.environ.get('IMAGE', ''), + 'spark.mesos.executor.docker.forcePullImage': 'false', + 'spark.mesos.task.labels': 'lcmap-spark:{}'.format(os.environ['USER']), + 'spark.serializer': 'org.apache.spark.serializer.KryoSerializer', + 'spark.python.worker.memory': '1g', + 'spark.executor.cores': '1', + 'spark.cores.max': '1000', + 'spark.executor.memory': '4g'} + + + def context(conf): + return pyspark.SparkContext(master=os.environ['MASTER'], + appName='lcmap-spark:{}'.format(os.environ['USER']), + conf=pyspark.SparkConf().setAll(conf.items())) + + + def application(): + sc = None + try: + sc = context(conf()) + rdd = sc.parallelize(range(1000000)) + return {'min': rdd.min(), 'max': rdd.max()} + finally: + sc.stop() + + # run it + application() + + +notebook - cluster mode +----------------------- + +.. code-block:: bash + + export IMAGE="usgseros/lcmap-spark:1.0" + export MASTER="mesos://zk://host1:2181,host2:2181,host3:2181/mesos" + export MESOS_PRINCIPAL="" + export MESOS_SECRET="" + export MESOS_ROLE="" + + docker run -it --rm --net host -u `id -u` \ + -v /home/user/notebook:/home/lcmap/notebook \ + -v /home/user/mesos-keys:/certs \ + -e IMAGE=$IMAGE \ + -e MASTER=$MASTER \ + -e MESOS_PRINCIPAL=$MESOS_PRINCIPAL \ + -e MESOS_SECRET=$MESOS_SECRET \ + -e MESOS_ROLE=$MESOS_ROLE \ + $IMAGE \ + jupyter --ip=$HOSTNAME notebook + +.. code-block:: python + + """Example Notebook connecting to Spark""" + + import os + import pyspark + + + def conf(): + return {'spark.driver.host': os.environ['HOSTNAME'], + 'spark.mesos.principal': os.environ.get('MESOS_PRINCIPAL', ''), + 'spark.mesos.secret': os.environ.get('MESOS_SECRET', ''), + 'spark.mesos.role': os.environ.get('MESOS_ROLE', ''), + 'spark.mesos.executor.docker.image': os.environ.get('IMAGE', ''), + 'spark.mesos.executor.docker.forcePullImage': 'false', + 'spark.mesos.task.labels': 'lcmap-spark:{}'.format(os.environ['USER']), + 'spark.serializer': 'org.apache.spark.serializer.KryoSerializer', + 'spark.python.worker.memory': '1g', + 'spark.executor.cores': '1', + 'spark.cores.max': '1000', + 'spark.executor.memory': '4g'} + + + def context(conf): + return pyspark.SparkContext(master=os.environ['MASTER'], + appName='lcmap-spark:{}'.format(os.environ['USER']), + conf=pyspark.SparkConf().setAll(conf.items())) + + + def application(): + sc = None + try: + sc = context(conf()) + rdd = sc.parallelize(range(1000000)) + return {'min': rdd.min(), 'max': rdd.max()} + finally: + sc.stop() + + # run it + application() diff --git a/files/opt/spark/dist/sbin/dispatcher-entry-point.sh b/files/opt/spark/dist/sbin/dispatcher-entry-point.sh deleted file mode 100755 index a117e54..0000000 --- a/files/opt/spark/dist/sbin/dispatcher-entry-point.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -echo "Starting Mesos Dispatcher"; - -echo "All logging redirected to stdout... ignore messages for log locations" - -if [ "$MESOS_MASTER" = "" ]; then - echo "MESOS_MASTER environment variable not set... exiting" - exit 1; -fi - -if [ "$ZOOKEEPER" = "" ]; then - echo "ZOOKEEPER environment variable not set... exiting" - exit 1; -fi - -if [ "$FRAMEWORK_NAME" = "" ]; then - echo "FRAMEWORK_NAME environment variable not set... exiting" - exit 1; -fi - -exec /opt/spark/dist/sbin/start-mesos-dispatcher.sh --master $MESOS_MASTER --zk $ZOOKEEPER --name $FRAMEWORK_NAME 2>&1 - diff --git a/jobs/job.py b/jobs/job.py new file mode 100644 index 0000000..6c96f58 --- /dev/null +++ b/jobs/job.py @@ -0,0 +1,10 @@ +import pyspark + +def run(): + sc = pyspark.SparkContext() + rdd = sc.parallelize(range(3)) + print("Sum of range(3) is:{}".format(rdd.sum())) + sc.close() + +if __name__ == '__main__': + run() diff --git a/jobs/jobs.zip b/jobs/jobs.zip new file mode 100644 index 0000000000000000000000000000000000000000..95d18d8ecb496c98bce4bcecc6b236420f270e43 GIT binary patch literal 310 zcmWIWW@Zs#U|`^2(9d`D*}FM7Z3>Wg0*KidWEisYlk^HILqj+jm^BNFqd>T{f}4Sn ziEj}@Aq8}wiTAwk6&+{ z_GX!`gV6Vr2d+N+`|`{zuMNCMlV6Di%y&>s literal 0 HcmV?d00001 diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..224fad2 --- /dev/null +++ b/pom.xml @@ -0,0 +1,20 @@ + + 4.0.0 + gov.usgs.eros.lcmap + lcmap-spark + jar + 1.0 + LCMAP Spark Dependencies + http://maven.apache.org + + + com.datastax.spark + spark-cassandra-connector_2.11 + 2.0.2 + test + + + diff --git a/version.txt b/version.txt new file mode 100644 index 0000000..b9b0237 --- /dev/null +++ b/version.txt @@ -0,0 +1,2 @@ +1.0 +