From 25f7c0af46ab3efea8d581c126cc5d8eca6ccd56 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 10 May 2018 15:54:27 -0700 Subject: [PATCH 01/18] Added custom package script --- inst/startup/install_custom.R | 10 ++++++++++ samples/azure_files/azure_files_cluster.json | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 inst/startup/install_custom.R diff --git a/inst/startup/install_custom.R b/inst/startup/install_custom.R new file mode 100644 index 00000000..9335720d --- /dev/null +++ b/inst/startup/install_custom.R @@ -0,0 +1,10 @@ +args <- commandArgs(trailingOnly = TRUE) + +packages <- list.files(args[1]) +for (i in 1:length(packages)) { + print(packages[i]) + install.packages(packages[i], + lib = paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), + "R/packages"), + type = "source") +} diff --git a/samples/azure_files/azure_files_cluster.json b/samples/azure_files/azure_files_cluster.json index 1cfd7e89..bd68eaed 100644 --- a/samples/azure_files/azure_files_cluster.json +++ b/samples/azure_files/azure_files_cluster.json @@ -20,5 +20,7 @@ }, "commandLine": [ "mkdir /mnt/batch/tasks/shared/data", - "mount -t cifs //.file.core.windows.net/ /mnt/batch/tasks/shared/data -o vers=3.0,username=,password===,dir_mode=0777,file_mode=0777,sec=ntlmssp"] + "mount -t cifs //.file.core.windows.net/ /mnt/batch/tasks/shared/data -o vers=3.0,username=,password===,dir_mode=0777,file_mode=0777,sec=ntlmssp", + "docker run --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_SHARED_DIR=$AZ_BATCH_NODE_SHARED_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR rocker/tidyverse:latest Rscript --no-save --no-environ --no-restore --no-site-file --verbose $AZ_BATCH_NODE_STARTUP_DIR/wd/install_custom.R /mnt/batch/tasks/shared/data" + ] } From 0a3fd72ba2b541e76a2bb9791646b23b0355792d Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 10 May 2018 15:56:10 -0700 Subject: [PATCH 02/18] Added feature custom download --- samples/azure_files/azure_files_cluster.json | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/azure_files/azure_files_cluster.json b/samples/azure_files/azure_files_cluster.json index bd68eaed..1a6fab36 100644 --- a/samples/azure_files/azure_files_cluster.json +++ b/samples/azure_files/azure_files_cluster.json @@ -21,6 +21,7 @@ "commandLine": [ "mkdir /mnt/batch/tasks/shared/data", "mount -t cifs //.file.core.windows.net/ /mnt/batch/tasks/shared/data -o vers=3.0,username=,password===,dir_mode=0777,file_mode=0777,sec=ntlmssp", + "wget https://raw.githubusercontent.com/Azure/doAzureParallel/feature/custom-package/inst/startup/install_custom.R", "docker run --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_SHARED_DIR=$AZ_BATCH_NODE_SHARED_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR rocker/tidyverse:latest Rscript --no-save --no-environ --no-restore --no-site-file --verbose $AZ_BATCH_NODE_STARTUP_DIR/wd/install_custom.R /mnt/batch/tasks/shared/data" ] } From f7cefcf399f458312c7df516f16e2b154d091d28 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 10 May 2018 15:56:42 -0700 Subject: [PATCH 03/18] Fixed typo --- samples/azure_files/azure_files_cluster.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/azure_files/azure_files_cluster.json b/samples/azure_files/azure_files_cluster.json index 1a6fab36..d83d0e9d 100644 --- a/samples/azure_files/azure_files_cluster.json +++ b/samples/azure_files/azure_files_cluster.json @@ -20,7 +20,7 @@ }, "commandLine": [ "mkdir /mnt/batch/tasks/shared/data", - "mount -t cifs //.file.core.windows.net/ /mnt/batch/tasks/shared/data -o vers=3.0,username=,password===,dir_mode=0777,file_mode=0777,sec=ntlmssp", + "mount -t cifs //.file.core.windows.net/ /mnt/batch/tasks/shared/data -o vers=3.0,username=,password=,dir_mode=0777,file_mode=0777,sec=ntlmssp", "wget https://raw.githubusercontent.com/Azure/doAzureParallel/feature/custom-package/inst/startup/install_custom.R", "docker run --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_SHARED_DIR=$AZ_BATCH_NODE_SHARED_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR rocker/tidyverse:latest Rscript --no-save --no-environ --no-restore --no-site-file --verbose $AZ_BATCH_NODE_STARTUP_DIR/wd/install_custom.R /mnt/batch/tasks/shared/data" ] From 8c0232897e6bd989dcde71d673a4fa750a1cd29e Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 10 May 2018 16:07:17 -0700 Subject: [PATCH 04/18] Fixed directory for installation --- inst/startup/install_custom.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/startup/install_custom.R b/inst/startup/install_custom.R index 9335720d..842fbdad 100644 --- a/inst/startup/install_custom.R +++ b/inst/startup/install_custom.R @@ -5,6 +5,6 @@ for (i in 1:length(packages)) { print(packages[i]) install.packages(packages[i], lib = paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), - "R/packages"), + "/R/packages"), type = "source") } From 75081f59bdc05fad502a5a3ae2d1d77e65356e4a Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 10 May 2018 16:22:08 -0700 Subject: [PATCH 05/18] Fixed full folder directory --- inst/startup/install_custom.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/startup/install_custom.R b/inst/startup/install_custom.R index 842fbdad..7c91d69d 100644 --- a/inst/startup/install_custom.R +++ b/inst/startup/install_custom.R @@ -1,6 +1,6 @@ args <- commandArgs(trailingOnly = TRUE) -packages <- list.files(args[1]) +packages <- list.files(args[1], full.names = TRUE) for (i in 1:length(packages)) { print(packages[i]) install.packages(packages[i], From ac9d4a64a46db7782d117affb6bd8e7b2065f24f Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 10 May 2018 16:41:34 -0700 Subject: [PATCH 06/18] Add dependencies and fix pattern --- inst/startup/install_custom.R | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/inst/startup/install_custom.R b/inst/startup/install_custom.R index 7c91d69d..08f6aef2 100644 --- a/inst/startup/install_custom.R +++ b/inst/startup/install_custom.R @@ -1,10 +1,20 @@ args <- commandArgs(trailingOnly = TRUE) -packages <- list.files(args[1], full.names = TRUE) +if (length(args) > 2) { + if (is.null(args[2])) { + pattern = NULL + } + else { + pattern = args[2] + } +} + +packages <- list.files(args[1], full.names = TRUE, pattern = pattern) for (i in 1:length(packages)) { print(packages[i]) install.packages(packages[i], lib = paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), "/R/packages"), + dependencies = TRUE, type = "source") } From e255111791d0eba79e1ee2c8cfd5d32277e4e52e Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 10 May 2018 16:51:59 -0700 Subject: [PATCH 07/18] Fix pattern not found --- inst/startup/install_custom.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/inst/startup/install_custom.R b/inst/startup/install_custom.R index 08f6aef2..07deae7b 100644 --- a/inst/startup/install_custom.R +++ b/inst/startup/install_custom.R @@ -1,11 +1,9 @@ args <- commandArgs(trailingOnly = TRUE) -if (length(args) > 2) { - if (is.null(args[2])) { - pattern = NULL - } - else { - pattern = args[2] +pattern <- NULL +if (length(args) > 1) { + if (!is.null(args[2])) { + pattern <- args[2] } } From f19680c5fbca0ebcc0cb9e66a8e0deb21a6b156d Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 10 May 2018 17:00:34 -0700 Subject: [PATCH 08/18] Added repo --- inst/startup/install_custom.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/startup/install_custom.R b/inst/startup/install_custom.R index 07deae7b..00f7d2fb 100644 --- a/inst/startup/install_custom.R +++ b/inst/startup/install_custom.R @@ -14,5 +14,6 @@ for (i in 1:length(packages)) { lib = paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), "/R/packages"), dependencies = TRUE, + repos = "https://cloud.r-project.org", type = "source") } From f7b202691f68e7d12bf84d3bf525f1f7e74e0297 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 10 May 2018 17:17:19 -0700 Subject: [PATCH 09/18] Switching to devtools --- inst/startup/install_custom.R | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/inst/startup/install_custom.R b/inst/startup/install_custom.R index 00f7d2fb..11454dd8 100644 --- a/inst/startup/install_custom.R +++ b/inst/startup/install_custom.R @@ -7,13 +7,21 @@ if (length(args) > 1) { } } +devtoolsPackage <- "devtools" +if (!require(devtoolsPackage, character.only = TRUE)) { + install.packages(devtoolsPackage) + require(devtoolsPackage, character.only = TRUE) +} + packages <- list.files(args[1], full.names = TRUE, pattern = pattern) for (i in 1:length(packages)) { - print(packages[i]) - install.packages(packages[i], - lib = paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), - "/R/packages"), - dependencies = TRUE, - repos = "https://cloud.r-project.org", - type = "source") + devtools::install(packages[i], + lib = paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), + "/R/packages")) + # install.packages(packages[i], + # lib = paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), + # "/R/packages"), + # dependencies = TRUE, + # repos = "https://cloud.r-project.org", + # type = "source") } From 9409f62398e6af1b36efd3b7fa1c8a65994794a8 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 11 May 2018 10:06:21 -0700 Subject: [PATCH 10/18] Fixing devtools install with directory --- inst/startup/install_custom.R | 44 ++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/inst/startup/install_custom.R b/inst/startup/install_custom.R index 11454dd8..7d39d60d 100644 --- a/inst/startup/install_custom.R +++ b/inst/startup/install_custom.R @@ -1,5 +1,16 @@ args <- commandArgs(trailingOnly = TRUE) +sharedPackageDirectory <- file.path( + Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), + "R", + "packages") + +tempDir <- file.path( + Sys.getenv("AZ_BATCH_NODE_STARTUP_DIR"), + "tmp") + +.libPaths(c(sharedPackageDirectory, .libPaths())) + pattern <- NULL if (length(args) > 1) { if (!is.null(args[2])) { @@ -13,15 +24,26 @@ if (!require(devtoolsPackage, character.only = TRUE)) { require(devtoolsPackage, character.only = TRUE) } -packages <- list.files(args[1], full.names = TRUE, pattern = pattern) -for (i in 1:length(packages)) { - devtools::install(packages[i], - lib = paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), - "/R/packages")) - # install.packages(packages[i], - # lib = paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), - # "/R/packages"), - # dependencies = TRUE, - # repos = "https://cloud.r-project.org", - # type = "source") +packageDirs <- list.files( + path = tempDir, + full.names = TRUE, + recursive = FALSE) + +for (i in 1:length(packageDirs)) { + print("Package Directories") + print(packageDirs[i]) + + devtools::install(packageDirs[i], + args = c( + paste0( + "--library=", + "'", + sharedPackageDirectory, + "'"))) + + print("Package Directories Completed") } + +unlink( + tempDir, + recursive = TRUE) From 48d8d4d182baaa26807912e6408f214023d953a9 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 11 May 2018 13:51:06 -0700 Subject: [PATCH 11/18] Fix in for merger.R --- inst/startup/merger.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/inst/startup/merger.R b/inst/startup/merger.R index df535c35..73b52387 100644 --- a/inst/startup/merger.R +++ b/inst/startup/merger.R @@ -26,6 +26,7 @@ libPaths <- c( clusterPackageDirectory, .libPaths() ) +print(libPaths) .libPaths(libPaths) @@ -34,9 +35,11 @@ azbatchenv <- setwd(batchTaskWorkingDirectory) +print("Package referenced") for (package in azbatchenv$packages) { library(package, character.only = TRUE) } +print("Package reference completed") parent.env(azbatchenv$exportenv) <- globalenv() From dce215be600007bba438abc3540e84752215ee51 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 11 May 2018 14:53:21 -0700 Subject: [PATCH 12/18] Working cluster custom packages --- R/commandLineUtilities.R | 1 + inst/startup/merger.R | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/commandLineUtilities.R b/R/commandLineUtilities.R index b093caa5..87b0622e 100644 --- a/R/commandLineUtilities.R +++ b/R/commandLineUtilities.R @@ -123,6 +123,7 @@ dockerRunCommand <- dockerOptions <- paste( dockerOptions, + "-e AZ_BATCH_NODE_SHARED_DIR=$AZ_BATCH_NODE_SHARED_DIR", "-e AZ_BATCH_TASK_ID=$AZ_BATCH_TASK_ID", "-e AZ_BATCH_JOB_ID=$AZ_BATCH_JOB_ID", "-e AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR", diff --git a/inst/startup/merger.R b/inst/startup/merger.R index 73b52387..a2033b2a 100644 --- a/inst/startup/merger.R +++ b/inst/startup/merger.R @@ -18,7 +18,9 @@ batchJobPreparationDirectory <- Sys.getenv("AZ_BATCH_JOB_PREP_WORKING_DIR") batchTaskWorkingDirectory <- Sys.getenv("AZ_BATCH_TASK_WORKING_DIR") taskPackageDirectory <- paste0(batchTaskWorkingDirectory) -clusterPackageDirectory <- paste0(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR", "/R/packages")) +clusterPackageDirectory <- file.path(Sys.getenv("AZ_BATCH_NODE_SHARED_DIR"), + "R", + "packages") libPaths <- c( taskPackageDirectory, @@ -35,11 +37,9 @@ azbatchenv <- setwd(batchTaskWorkingDirectory) -print("Package referenced") for (package in azbatchenv$packages) { library(package, character.only = TRUE) } -print("Package reference completed") parent.env(azbatchenv$exportenv) <- globalenv() From 6a0a176bec025577c93fad34746d1c7e2a04d8ff Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 11 May 2018 14:59:56 -0700 Subject: [PATCH 13/18] Removed printed statements --- inst/startup/merger.R | 1 - 1 file changed, 1 deletion(-) diff --git a/inst/startup/merger.R b/inst/startup/merger.R index a2033b2a..d21ecd76 100644 --- a/inst/startup/merger.R +++ b/inst/startup/merger.R @@ -28,7 +28,6 @@ libPaths <- c( clusterPackageDirectory, .libPaths() ) -print(libPaths) .libPaths(libPaths) From e14ec0e10507ecaf22ad3f313ff995e2b856f10f Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 11 May 2018 17:55:44 -0700 Subject: [PATCH 14/18] Working on custom docs --- R/cluster.R | 4 ++ docs/20-package-management.md | 68 ++++++++++++------- .../custom_packages_cluster.json | 27 ++++++++ 3 files changed, 76 insertions(+), 23 deletions(-) create mode 100644 samples/package_management/custom_packages_cluster.json diff --git a/R/cluster.R b/R/cluster.R index e77c9eaf..6ca05304 100644 --- a/R/cluster.R +++ b/R/cluster.R @@ -151,6 +151,10 @@ makeCluster <- "wget https://raw.githubusercontent.com/Azure/doAzureParallel/", "master/inst/startup/install_bioconductor.R" ), + paste0( + "wget https://raw.githubusercontent.com/Azure/doAzureParallel/", + "feature/custom-package/inst/startup/install_custom.R" + ), "chmod u+x install_bioconductor.R", installAndStartContainerCommand ) diff --git a/docs/20-package-management.md b/docs/20-package-management.md index 43a54dff..90ed0e7f 100644 --- a/docs/20-package-management.md +++ b/docs/20-package-management.md @@ -38,29 +38,37 @@ You can install packages by specifying the package(s) in your JSON pool configur } ``` +## Installing Packages per-*foreach* Loop + +You can also install cran packages by using the **.packages** option in the *foreach* loop. You can also install github/bioconductor packages by using the **github** and **bioconductor" option in the *foreach* loop. Instead of installing packages during pool creation, packages (and its dependencies) can be installed before each iteration in the loop is run on your Azure cluster. + +### Installing a Github Package + +doAzureParallel supports github package with the **github** option. + +Please do not use "https://github.com/" as prefix for the github package name above. + ## Installing packages from a private GitHub repository -Clusters can be configured to install packages from a private GitHub repository by setting the __githubAuthenticationToken__ property. If this property is blank only public repositories can be used. If a token is added then public and the private github repo can be used together. +Clusters can be configured to install packages from a private GitHub repository by setting the __githubAuthenticationToken__ property in the credentials file. If this property is blank only public repositories can be used. If a token is added then public and the private github repo can be used together. When the cluster is created the token is passed in as an environment variable called GITHUB\_PAT on start-up which lasts the life of the cluster and is looked up whenever devtools::install_github is called. +Credentials File for github authentication token +``` json +{ + ... + "githubAuthenticationToken": "", + ... +} + +``` + +Cluster File ```json { { - "name": , - "vmSize": , - "maxTasksPerNode": , - "poolSize": { - "dedicatedNodes": { - "min": 2, - "max": 2 - }, - "lowPriorityNodes": { - "min": 1, - "max": 10 - }, - "autoscaleFormula": "QUEUE" - }, + ... "rPackages": { "cran": [], "github": [""], @@ -71,10 +79,18 @@ When the cluster is created the token is passed in as an environment variable ca } ``` -_More information regarding github authentication tokens can be found [here](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/)_ +_More information regarding github authentication tokens can be found [here](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/) -## Installing Packages per-*foreach* Loop -You can also install cran packages by using the **.packages** option in the *foreach* loop. You can also install github/bioconductor packages by using the **github** and **bioconductor" option in the *foreach* loop. Instead of installing packages during pool creation, packages (and its dependencies) can be installed before each iteration in the loop is run on your Azure cluster. +### Installing Multiple Packages +By using character vectors of the packages, + +```R +number_of_iterations <- 10 +results <- foreach(i = 1:number_of_iterations, + .packages=c('package_1', 'package_2'), + github = c('Azure/rAzureBatch', 'Azure/doAzureParallel'), + bioconductor = c('IRanges', 'Biobase')) %dopar% { ... } +``` To install a single cran package: ```R @@ -94,7 +110,6 @@ number_of_iterations <- 10 results <- foreach(i = 1:number_of_iterations, github='azure/rAzureBatch') %dopar% { ... } ``` -Please do not use "https://github.com/" as prefix for the github package name above. To install multiple github packages: ```R @@ -114,7 +129,7 @@ number_of_iterations <- 10 results <- foreach(i = 1:number_of_iterations, bioconductor=c('package_1', 'package_2')) %dopar% { ... } ``` -## Installing Packages from BioConductor +## Installing a BioConductor Package The default deployment of R used in the cluster (see [Customizing the cluster](./30-customize-cluster.md) for more information) includes the Bioconductor installer by default. Simply add packages to the cluster by adding packages in the array. ```json @@ -134,17 +149,24 @@ The default deployment of R used in the cluster (see [Customizing the cluster](. }, "autoscaleFormula": "QUEUE" }, + "containerImage:" "rocker/tidyverse:latest", "rPackages": { "cran": [], "github": [], "bioconductor": ["IRanges"] }, - "commandLine": [] + "commandLine": [], + "subnetId": "" } } ``` -Note: Container references that are not provided by tidyverse do not support Bioconductor installs. If you choose another container, you must make sure that Biocondunctor is installed. +Note: Container references that are not provided by tidyverse do not support Bioconductor installs. If you choose another container, you must make sure that Bioconductor is installed. + +## Installing Custom Packages +Our recommendation for installing custom packages is uploading your R packages sources to an Azure file share. + +Custom packages installation on the per-*foreach* loop level is not supported. -## Uninstalling packages +## Uninstalling a Package Uninstalling packages from your pool is not supported. However, you may consider rebuilding your pool. diff --git a/samples/package_management/custom_packages_cluster.json b/samples/package_management/custom_packages_cluster.json new file mode 100644 index 00000000..f490021a --- /dev/null +++ b/samples/package_management/custom_packages_cluster.json @@ -0,0 +1,27 @@ +{ + "name": "custom-package-pool", + "vmSize": "Standard_D2_v2", + "maxTasksPerNode": 1, + "poolSize": { + "dedicatedNodes": { + "min": 2, + "max": 2 + }, + "lowPriorityNodes": { + "min": 0, + "max": 0 + }, + "autoscaleFormula": "QUEUE" + }, + "rPackages": { + "cran": [], + "github": [], + "bioconductor": [] + }, + "commandLine": [ + "mkdir /mnt/batch/tasks/shared/data", + "mount -t cifs //.file.core.windows.net/ /mnt/batch/tasks/shared/data -o vers=3.0,username=,password=,dir_mode=0777,file_mode=0777,sec=ntlmssp", + "mkdir $AZ_BATCH_NODE_STARTUP_DIR/tmp | for i in `ls $AZ_BATCH_NODE_SHARED_DIR/data/*.tar.gz | awk '{print $NF}'`; do tar -xvf $i -C $AZ_BATCH_NODE_STARTUP_DIR/tmp; done", + "docker run --rm -v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_SHARED_DIR=$AZ_BATCH_NODE_SHARED_DIR -e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR -e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR rocker/tidyverse:latest Rscript --no-save --no-environ --no-restore --no-site-file --verbose $AZ_BATCH_NODE_STARTUP_DIR/wd/install_custom.R /mnt/batch/tasks/shared/data" + ] +} From 25c42f0a58c477c8ad271fc9e0d450591c754348 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 08:13:11 -0700 Subject: [PATCH 15/18] Custom packages sample docs --- docs/20-package-management.md | 7 ++-- .../{ => bioconductor}/bioconductor.r | 0 .../package_management_cluster.json | 0 samples/package_management/custom/README.md | 32 +++++++++++++++++++ samples/package_management/custom/custom.R | 24 ++++++++++++++ .../{ => custom}/custom_packages_cluster.json | 0 6 files changed, 61 insertions(+), 2 deletions(-) rename samples/package_management/{ => bioconductor}/bioconductor.r (100%) mode change 100755 => 100644 rename samples/package_management/{ => bioconductor}/package_management_cluster.json (100%) create mode 100644 samples/package_management/custom/README.md create mode 100644 samples/package_management/custom/custom.R rename samples/package_management/{ => custom}/custom_packages_cluster.json (100%) diff --git a/docs/20-package-management.md b/docs/20-package-management.md index 90ed0e7f..11d80f8a 100644 --- a/docs/20-package-management.md +++ b/docs/20-package-management.md @@ -164,9 +164,12 @@ The default deployment of R used in the cluster (see [Customizing the cluster](. Note: Container references that are not provided by tidyverse do not support Bioconductor installs. If you choose another container, you must make sure that Bioconductor is installed. ## Installing Custom Packages -Our recommendation for installing custom packages is uploading your R packages sources to an Azure file share. +doAzureParallel supports custom package installation in the cluster. Custom packages installation on the per-*foreach* loop level is not supported. -Custom packages installation on the per-*foreach* loop level is not supported. +For steps on installing on custom packages, it can be found [here](../samples/package_management/custom/README.md). + +Note: If the package requires a compilation such as apt-get installations, users will be require +to build their own containers. ## Uninstalling a Package Uninstalling packages from your pool is not supported. However, you may consider rebuilding your pool. diff --git a/samples/package_management/bioconductor.r b/samples/package_management/bioconductor/bioconductor.r old mode 100755 new mode 100644 similarity index 100% rename from samples/package_management/bioconductor.r rename to samples/package_management/bioconductor/bioconductor.r diff --git a/samples/package_management/package_management_cluster.json b/samples/package_management/bioconductor/package_management_cluster.json similarity index 100% rename from samples/package_management/package_management_cluster.json rename to samples/package_management/bioconductor/package_management_cluster.json diff --git a/samples/package_management/custom/README.md b/samples/package_management/custom/README.md new file mode 100644 index 00000000..725652f5 --- /dev/null +++ b/samples/package_management/custom/README.md @@ -0,0 +1,32 @@ +## Installing Custom Packages +doAzureParallel supports custom package installation in the cluster. Custom packages are R packages that cannot be hosted on Github or be built on a docker image. The recommended approach for custom packages is building them from source and uploading them to an Azure File Share. + +Note: If the package requires a compilation such as apt-get installations, users will be require +to build their own containers. + +### Building Package from Source in RStudio +1. Open *RStudio* +2. Go to *Build* on the navigation bar +3. Go to *Build From Source* + +### Uploading Custom Package to Azure Files +For detailed steps on uploading files to Azure Files in the Portal can be found +[here](https://docs.microsoft.com/en-us/azure/storage/files/storage-how-to-use-files-portal) + +### Tips +1) In order to build the custom packages' dependencies, we need to untar the R packages and build them within their directories. By default, we will build custom packages in the *$AZ_BATCH_NODE_SHARED_DIR/tmp* directory. +2) By default, the custom package cluster configuration file will install any packages that are a *.tar.gz file in the file share. If users want to specify R packages, they must use change this line in the cluster configuration file. + +Finds files that end with *.tar.gz in the current Azure File Share directory +``` json +{ + ... + "commandLine": [ + ... + "mkdir $AZ_BATCH_NODE_STARTUP_DIR/tmp | for i in `ls $AZ_BATCH_NODE_SHARED_DIR/data/*.tar.gz | awk '{print $NF}'`; do tar -xvf $i -C $AZ_BATCH_NODE_STARTUP_DIR/tmp; done", + ... + ] +} +``` + + diff --git a/samples/package_management/custom/custom.R b/samples/package_management/custom/custom.R new file mode 100644 index 00000000..f9fae6c8 --- /dev/null +++ b/samples/package_management/custom/custom.R @@ -0,0 +1,24 @@ +#Please see documentation at docs/20-package-management.md for more details on packagement management. + +# import the doAzureParallel library and its dependencies +library(doAzureParallel) + +# set your credentials +doAzureParallel::setCredentials("credentials.json") + +# Create your cluster if not exist +cluster <- doAzureParallel::makeCluster("custom_packages_cluster.json") + +# register your parallel backend +doAzureParallel::registerDoAzureParallel(cluster) + +# check that your workers are up +doAzureParallel::getDoParWorkers() + +summary <- foreach(i = 1:1, .packages = c("customR")) %dopar% { + sessionInfo() + # Method from customR + hello() +} + +summary diff --git a/samples/package_management/custom_packages_cluster.json b/samples/package_management/custom/custom_packages_cluster.json similarity index 100% rename from samples/package_management/custom_packages_cluster.json rename to samples/package_management/custom/custom_packages_cluster.json From 687ec7d10100bad8962f18c0a032891ba44f7145 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 08:29:09 -0700 Subject: [PATCH 16/18] Fixed typo in azure files typo --- samples/azure_files/readme.md | 2 +- samples/package_management/custom/README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/azure_files/readme.md b/samples/azure_files/readme.md index 110b737d..59e7f9e4 100644 --- a/samples/azure_files/readme.md +++ b/samples/azure_files/readme.md @@ -12,4 +12,4 @@ This samples shows how to update the cluster configuration to create a new mount For large data sets or large traffic applications be sure to review the Azure Files [scalability and performance targets](https://docs.microsoft.com/en-us/azure/storage/common/storage-scalability-targets#scalability-targets-for-blobs-queues-tables-and-files). -For very large data sets we recommend using Azure Blobs. You can learn more in the [persistent storage](../../docs/23-persistent-storage.md) and [distrubuted data](../../docs/21-distributing-data.md) docs. +For very large data sets we recommend using Azure Blobs. You can learn more in the [persistent storage](../../docs/23-persistent-storage.md) and [distributing data](../../docs/21-distributing-data.md) docs. diff --git a/samples/package_management/custom/README.md b/samples/package_management/custom/README.md index 725652f5..b18e3026 100644 --- a/samples/package_management/custom/README.md +++ b/samples/package_management/custom/README.md @@ -13,7 +13,7 @@ to build their own containers. For detailed steps on uploading files to Azure Files in the Portal can be found [here](https://docs.microsoft.com/en-us/azure/storage/files/storage-how-to-use-files-portal) -### Tips +### Notes 1) In order to build the custom packages' dependencies, we need to untar the R packages and build them within their directories. By default, we will build custom packages in the *$AZ_BATCH_NODE_SHARED_DIR/tmp* directory. 2) By default, the custom package cluster configuration file will install any packages that are a *.tar.gz file in the file share. If users want to specify R packages, they must use change this line in the cluster configuration file. @@ -28,5 +28,5 @@ Finds files that end with *.tar.gz in the current Azure File Share directory ] } ``` - - +3) For more information on using Azure Files on Batch, follow our other [sample](./azure_files/readme.md) of using Azure Files +4) Replace your Storage Account name, endpoint and key in the cluster configuration file From e519c9d29f605ce3f18768a7e62b6058ba4b68d9 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 13:17:07 -0700 Subject: [PATCH 17/18] Fixed typos based on PR --- docs/20-package-management.md | 4 ++-- samples/package_management/bioconductor/bioconductor.r | 2 +- samples/package_management/custom/README.md | 4 ++-- samples/package_management/custom/custom.R | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/20-package-management.md b/docs/20-package-management.md index 11d80f8a..efb307b9 100644 --- a/docs/20-package-management.md +++ b/docs/20-package-management.md @@ -166,9 +166,9 @@ Note: Container references that are not provided by tidyverse do not support Bio ## Installing Custom Packages doAzureParallel supports custom package installation in the cluster. Custom packages installation on the per-*foreach* loop level is not supported. -For steps on installing on custom packages, it can be found [here](../samples/package_management/custom/README.md). +For steps on installing custom packages, it can be found [here](../samples/package_management/custom/README.md). -Note: If the package requires a compilation such as apt-get installations, users will be require +Note: If the package requires a compilation such as apt-get installations, users will be required to build their own containers. ## Uninstalling a Package diff --git a/samples/package_management/bioconductor/bioconductor.r b/samples/package_management/bioconductor/bioconductor.r index a5074fdf..7f80c64d 100644 --- a/samples/package_management/bioconductor/bioconductor.r +++ b/samples/package_management/bioconductor/bioconductor.r @@ -1,4 +1,4 @@ -#Please see documentation at docs/20-package-management.md for more details on packagement management. +#Please see documentation at docs/20-package-management.md for more details on package management. # import the doAzureParallel library and its dependencies library(doAzureParallel) diff --git a/samples/package_management/custom/README.md b/samples/package_management/custom/README.md index b18e3026..e44159d4 100644 --- a/samples/package_management/custom/README.md +++ b/samples/package_management/custom/README.md @@ -1,7 +1,7 @@ ## Installing Custom Packages doAzureParallel supports custom package installation in the cluster. Custom packages are R packages that cannot be hosted on Github or be built on a docker image. The recommended approach for custom packages is building them from source and uploading them to an Azure File Share. -Note: If the package requires a compilation such as apt-get installations, users will be require +Note: If the package requires a compilation such as apt-get installations, users will be required to build their own containers. ### Building Package from Source in RStudio @@ -15,7 +15,7 @@ For detailed steps on uploading files to Azure Files in the Portal can be found ### Notes 1) In order to build the custom packages' dependencies, we need to untar the R packages and build them within their directories. By default, we will build custom packages in the *$AZ_BATCH_NODE_SHARED_DIR/tmp* directory. -2) By default, the custom package cluster configuration file will install any packages that are a *.tar.gz file in the file share. If users want to specify R packages, they must use change this line in the cluster configuration file. +2) By default, the custom package cluster configuration file will install any packages that are a *.tar.gz file in the file share. If users want to specify R packages, they must change this line in the cluster configuration file. Finds files that end with *.tar.gz in the current Azure File Share directory ``` json diff --git a/samples/package_management/custom/custom.R b/samples/package_management/custom/custom.R index f9fae6c8..3fb947fb 100644 --- a/samples/package_management/custom/custom.R +++ b/samples/package_management/custom/custom.R @@ -1,4 +1,4 @@ -#Please see documentation at docs/20-package-management.md for more details on packagement management. +#Please see documentation at docs/20-package-management.md for more details on package management. # import the doAzureParallel library and its dependencies library(doAzureParallel) From 14c50ed8b90ab1741a7d9b9c2a0f188617dfd6b3 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 13:21:27 -0700 Subject: [PATCH 18/18] Fixed download install custom path --- R/cluster.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cluster.R b/R/cluster.R index 6ca05304..e6c18bab 100644 --- a/R/cluster.R +++ b/R/cluster.R @@ -153,7 +153,7 @@ makeCluster <- ), paste0( "wget https://raw.githubusercontent.com/Azure/doAzureParallel/", - "feature/custom-package/inst/startup/install_custom.R" + "master/inst/startup/install_custom.R" ), "chmod u+x install_bioconductor.R", installAndStartContainerCommand