Skip to content

Commit

Permalink
Update run script templates for v 1.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jtnystrom committed Jan 22, 2021
1 parent ae7361d commit cb0c483
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion spark-submit.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ exec $SPARK/bin/spark-submit \
--packages org.rogach:scallop_2.11:latest.integration \
--jars lib/fastdoop-1.0.0.jar \
--master $MASTER \
--class discount.spark.Discount target/scala-2.11/discount_2.11-1.1.0.jar $*
--class discount.spark.Discount target/scala-2.11/discount_2.11-1.2.0.jar $*
4 changes: 2 additions & 2 deletions submit-aws.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ BUCKET=s3://my-bucket/discount
#Copy jars and data files the first time only, after which the following lines can safely be commented out
aws s3 cp lib/fastdoop-1.0.0.jar $BUCKET/
aws s3 cp PASHA/pasha_all_*.txt $BUCKET/PASHA/
aws s3 cp target/scala-2.11/discount_2.11-1.1.0.jar $BUCKET/
aws s3 cp target/scala-2.11/discount_2.11-1.2.0.jar $BUCKET/

#Max size of input splits in bytes. A smaller number reduces memory usage but increases the number of
#partitions for the first stage. If this variable is unset, Spark's default of 128 MB will be used.
Expand All @@ -30,7 +30,7 @@ PARTITIONS=##spark.sql.shuffle.partitions=4000
COMMAND=(--packages org.rogach:scallop_2.11:latest.integration \
--conf $SPLIT \
--conf $PARTITIONS \
--jars $BUCKET/fastdoop-1.0.0.jar --class discount.spark.Discount $BUCKET/discount_2.11-1.1.0.jar $*)
--jars $BUCKET/fastdoop-1.0.0.jar --class discount.spark.Discount $BUCKET/discount_2.11-1.2.0.jar $*)

RUNNER_ARGS="spark-submit"
for PARAM in ${COMMAND[@]}
Expand Down
2 changes: 1 addition & 1 deletion submit-gcloud.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,6 @@ SPLIT=##spark.hadoop.mapreduce.input.fileinputformat.split.maxsize=$((64 * 1024
PROPERTIES="^##^spark.jars.packages=org.rogach:scallop_2.11:latest.integration$PARTITIONS$MAXRES$OVERHEAD$EXECMEM$SPLIT"

exec gcloud --verbosity=info dataproc jobs submit spark --region $REGION --cluster $CLUSTER \
--class discount.spark.Discount --jars target/scala-2.11/discount_2.11-1.1.0.jar,lib/fastdoop-1.0.0.jar \
--class discount.spark.Discount --jars target/scala-2.11/discount_2.11-1.2.0.jar,lib/fastdoop-1.0.0.jar \
--properties $PROPERTIES -- "$@"

0 comments on commit cb0c483

Please sign in to comment.