forked from valiantljk/h5spark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspark-scala-yarn.sh
executable file
·52 lines (45 loc) · 1.56 KB
/
spark-scala-yarn.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/bash
# Copyright (C) 2016 The HDF Group
# All rights reserved
#
# This example shows how to submit spark job using Yarn in cluster mode.
# This is modifeid shell script based on spark-scala.sh.
# This script is tested on Mac OS X Mavericks machine with the following:
#
# Hadoop 2.7.3
# Spark 2.0.0 with Hadoop 2.7.
#
export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/Users/hyoklee/Library/Java/Extensions
# This is critical for Yarn to load HDF5 JNI library properly.
export SPARK_YARN_USER_ENV="JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH,LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
export HADOOP_MAPRED_HOME=/Users/hyoklee/src/hadoop-2.7.3/
# This is useful to check java.library.path.
java -XshowSettings:properties -version
repartition="1"
# A NASA HDF-EOS5 file
inputfile="/tmp/GSSTF_NCEP.3.1987.07.01.he5"
app_name="H5Sspark-udf"
dataset="/HDFEOS/GRIDS/NCEP/Data Fields/SST"
# For standalone single node
# SPARKURL="local[1]"
# For standalone cluster
# SPARKURL="spark://localhost:7077"
# For Yarn
SPARKURL="yarn"
SCRATCH="/tmp"
spark-submit --verbose\
--master $SPARKURL\
--deploy-mode cluster \
--name $app_name \
--executor-cores 1 \
--driver-cores 1 \
--num-executors=1 \
--class org.nersc.io.readtest\
--conf spark.eventLog.enabled=true\
--conf spark.eventLog.dir=$SCRATCH/spark\
--driver-library-path=.\
target/scala-2.11/h5spark-assembly-1.0.jar \
$repartition "$inputfile" "$dataset"
rm /tmp/spark/*
# stop-all.sh
# stop-collectl.sh