Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

An integer cycling demo for satellite processing. #1028

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions examples/satellite/suite.rc
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!Jinja2

title = Demonstrates real time satellite data processing
description = """
Each successive integer cycle retrieves and processes the next
arbitrarily timed and arbitrarily labelled dataset, in parallel
with previous cycles if the data comes in quickly."""

# you can monitor output processing with:
# $ watch -n 1 \
# "find ~/cylc-run/<SUITE>/share; find ~/cylc-run/SUITE/work"

{% set N_DATASETS = 5 %}

# define shared directories (could use runtime namespaces for this)
{% set DATA_IN_DIR = "$CYLC_SUITE_SHARE_DIR/incoming" %}
{% set PRODUCT_DIR = "$CYLC_SUITE_SHARE_DIR/products" %}

[scheduling]
cycling mode = integer
initial cycle point = 1
final cycle point = {{N_DATASETS}}
[[dependencies]]
[[[R1/c0/P1]]] # first cycle
graph = prep => satsim & get_data
[[[R/c0/P1]]]
graph = """
# Processing chain for each dataset
get_data => proc1 => proc2 => products
# As one dataset is retrieved, start waiting on another.
get_data[-1] => get_data"""
[[[R1/P1/{{N_DATASETS}}]]] # last cycle
graph = products => collate

[runtime]
[[prep]]
title = clean the suite output directories
command scripting = \
rm -rf $CYLC_SUITE_SHARE_DIR $CYLC_SUITE_WORK_DIR

[[satsim]]
title = simulate a satellite data feed
description = """Generates {{N_DATASETS}} arbitrarily labelled
datasets after random time intervals."""
pre-command scripting = mkdir -p {{DATA_IN_DIR}}
command scripting = """
COUNT=0
while true; do
(( COUNT == {{N_DATASETS}} )) && break
sleep $(( 1 + RANDOM % 10 ))
touch {{DATA_IN_DIR}}/dataset-$(date +%s).raw
(( COUNT += 1 ))
done"""

[[WORKDIR]]
# Define a common cycle-point-specific work-directory for all
# processing tasks so that they all work on the same dataset.
work sub-directory = proc-$CYLC_TASK_CYCLE_POINT
pre-command scripting = sleep 10

[[get_data]]
inherit = WORKDIR
title = grab one new dataset, waiting if necessary
command scripting = """
while true; do
DATASET=$( ls {{DATA_IN_DIR}}/dataset-*.raw 2>/dev/null | head -n 1 )
if [[ -z $DATASET ]]; then
sleep 1
continue
fi
break
done
mv $DATASET $PWD"""

[[proc1]]
inherit = WORKDIR
title = convert .raw dataset to .proc1 form
command scripting = """
DATASET=$(ls dataset-*.raw)
mv $DATASET ${DATASET%raw}proc1"""

[[proc2]]
inherit = WORKDIR
title = convert .proc1 dataset to .proc2 form
command scripting = """
DATASET=$(ls dataset-*.proc1)
mv $DATASET ${DATASET%proc1}proc2"""

[[products]]
inherit = WORKDIR
title = generate products from .proc2 processed dataset
pre-command scripting = mkdir -p {{PRODUCT_DIR}}
command scripting = """
DATASET=$( ls dataset-*.proc2 )
mv $DATASET {{PRODUCT_DIR}}/${DATASET%proc2}prod"""

[[collate]]
title = collate all products from the suite run
# Note you might want to use "cylc suite-state" to check that
# _all_ product tasks have finished before collating results.
command scripting = ls {{PRODUCT_DIR}}

[visualization]
default node attributes = "style=filled", "shape=box"
[[node attributes]]
satsim = "fillcolor=yellow"
WORKDIR = "fillcolor=limegreen"
get_data = "fillcolor=skyblue"
products = "fillcolor=orange"
collate = "fillcolor=red"