From 36fadf42875f0e3cfac7466631f268c3059cee4e Mon Sep 17 00:00:00 2001 From: zfengms Date: Tue, 10 Oct 2017 17:27:18 -0700 Subject: [PATCH] Feature/longrunjobdoc (#139) * add documentation and sample for long running job * update sample file name * update long running job doc and test --- docs/31-long-running-job.md | 89 +++++++++++++++++++++ docs/README.md | 4 + samples/long_running_job/long_running_job.R | 68 ++++++++++++++++ tests/testthat/test-long-running-job.R | 12 +-- 4 files changed, 167 insertions(+), 6 deletions(-) create mode 100644 docs/31-long-running-job.md create mode 100644 samples/long_running_job/long_running_job.R diff --git a/docs/31-long-running-job.md b/docs/31-long-running-job.md new file mode 100644 index 00000000..58bd383d --- /dev/null +++ b/docs/31-long-running-job.md @@ -0,0 +1,89 @@ +# Long Running Job Management + +The doAzureParallel package allows you to manage long running jobs easily. There are 2 ways to run a job: +- Synchronous +- Asynchronous + +Long running job should run in asynchronous mode. + +## How to configure a job to run asynchronously +You can configure a job to run asynchronously by specifying wait = FALSE in job options: + +```R + options <- list(wait = FALSE) + jobId <- foreach(i = 1:number_of_iterations, .options.azure = options) %dopar% { ... } +``` +The returned value is the job Id associated with the foreach loop. Use this returned value you can get job status and job result. + +You can optionally specify the job Id in options as shown below: +```R + options <- list(wait = FALSE, job = 'myjob') + foreach(i = 1:number_of_iterations, .options.azure = options) %dopar% { ... } +``` + +## Get job status + +getJob returns job metadata, such as chunk size, whether cloud combine is enabled, and packages specified for the job, it also returns task counts in different state + +```R + getJob(jobId) + getJob(jobId, verbose = TRUE) + + sample output: + -------------- + job metadata: + chunkSize: 1 + enableCloudCombine: TRUE + packages: httr + + tasks: + active: 1 + running: 0 + completed: 5 + succeeded: 0 + failed: 5 + total: 6 +``` + +## Get job list +You can use getJobList() to get a summary of all jobs. + +```R + getJobList() + + sample output: + -------------- + Id State Status FailedTasks TotalTasks +1 job11 active No tasks in the job 0 0 +2 job20170714215517 active 0 % 0 6 +3 job20170714220129 active 0 % 0 6 +4 job20170714221557 active 84 % 4 6 +5 job20170803210552 active 0 % 0 6 +6 job20170803212205 active 0 % 0 6 +7 job20170803212558 active 0 % 0 6 +8 job20170714211502 completed 100 % 5 6 +9 job20170714223236 completed 100 % 0 6 +``` + +You can also filter job list by job state such as active or completed +```R + filter <- filter <- list() + filter$state <- c("active", "completed") + getJobList(filter) +``` + +## Retrieve long running job result +Once job is completed successfully, you can call getJobResult to retrieve the job result: + +```R + jobResult <- getJobResult(jobId) +``` + +### Clean up + +Once you get the job result, you can delete the job. +```R + rAzureBatch::deleteJob(jobId) +``` + +A [working sample](../samples/long_running_job/long_running_job.R) can be found in the samples directory. diff --git a/docs/README.md b/docs/README.md index fec31aeb..853caa93 100644 --- a/docs/README.md +++ b/docs/README.md @@ -37,6 +37,10 @@ This section will provide information about how Azure works, how best to take ad Setting up your cluster to user's specific needs +9. **Long Running Job** [(link)](./31-long-running-job.md) + + Best practices for managing long running jobs + ## Additional Documentation Take a look at our [**Troubleshooting Guide**](./40-troubleshooting.md) for information on how to diagnose common issues. diff --git a/samples/long_running_job/long_running_job.R b/samples/long_running_job/long_running_job.R new file mode 100644 index 00000000..e2002be8 --- /dev/null +++ b/samples/long_running_job/long_running_job.R @@ -0,0 +1,68 @@ +# ============= +# === Setup === +# ============= + +# install packages +library(devtools) +install_github("azure/razurebatch") +install_github("azure/doazureparallel") + +# import the doAzureParallel library and its dependencies +library(doAzureParallel) + +credentialsFileName <- "credentials.json" +clusterFileName <- "cluster.json" + +# generate a credentials json file +generateCredentialsConfig(credentialsFileName) + +# set your credentials +setCredentials(credentialsFileName) + +# generate a cluster config file +generateClusterConfig(clusterFileName) + +# Create your cluster if not exist +cluster <- makeCluster(clusterFileName) + +# register your parallel backend +registerDoAzureParallel(cluster) + +# check that your workers are up +getDoParWorkers() + +# ======================================================= +# === Create long running job and get progress/result === +# ======================================================= + +options <- list(wait = FALSE) +'%dopar%' <- foreach::'%dopar%' +jobId <- + foreach::foreach( + i = 1:4, + .packages = c('httr'), + .options.azure = opt + ) %dopar% { + mean(1:3) + } + +job <- getJob(jobId) + +# get active/running job list +filter <- filter <- list() +filter$state <- c("active", "completed") +getJobList(filter) + +# get job list for all jobs +getJobList() + +# wait 2 minutes for long running job to finish +Sys.sleep(120) + +# get job result +jobResult <- getJobResult(jobId) + +doAzureParallel::stopCluster(cluster) + +# delete the job +rAzureBatch::deleteJob(jobId) diff --git a/tests/testthat/test-long-running-job.R b/tests/testthat/test-long-running-job.R index da2d9d2a..97218f55 100644 --- a/tests/testthat/test-long-running-job.R +++ b/tests/testthat/test-long-running-job.R @@ -14,18 +14,18 @@ test_that("Long Running Job Test", { cluster <- doAzureParallel::makeCluster(clusterFileName) doAzureParallel::registerDoAzureParallel(cluster) - opt <- list(wait = FALSE) + options <- list(wait = FALSE, job = 'myjob') '%dopar%' <- foreach::'%dopar%' - res <- + jobId <- foreach::foreach( i = 1:4, .packages = c('httr'), - .options.azure = opt + .options.azure = options ) %dopar% { mean(1:3) } - job <- getJob(res) + job <- getJob(jobId) # get active/running job list filter <- filter <- list() @@ -39,7 +39,7 @@ test_that("Long Running Job Test", { Sys.sleep(120) # get job result - jobResult <- getJobResult(res) + jobResult <- getJobResult(jobId) doAzureParallel::stopCluster(cluster) @@ -51,5 +51,5 @@ test_that("Long Running Job Test", { list(2, 2, 2, 2)) # delete the job - rAzureBatch::deleteJob(res) + rAzureBatch::deleteJob(jobId) })