From 2af7f62a8a6466137f69a8daf022708d43d2a11e Mon Sep 17 00:00:00 2001 From: geoffjentry Date: Mon, 4 May 2015 19:59:01 -0400 Subject: [PATCH 001/326] Add license file --- LICENSE.txt | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 LICENSE.txt diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000000..2efdb96ebc1 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,27 @@ +Copyright (c) 2015, Broad Institute, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name Broad Institute, Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE From 3677b05ba8f938429b89520b8dd149a65fb479cf Mon Sep 17 00:00:00 2001 From: Scott Frazer Date: Mon, 1 Jun 2015 13:50:55 -0400 Subject: [PATCH 002/326] Docker files --- .dockerignore | 1 + Dockerfile | 24 ++++++++++++++++++++++++ docker/install.sh | 10 ++++++++++ docker/run.sh | 5 +++++ 4 files changed, 40 insertions(+) create mode 120000 .dockerignore create mode 100644 Dockerfile create mode 100755 docker/install.sh create mode 100755 docker/run.sh diff --git a/.dockerignore b/.dockerignore new file mode 120000 index 00000000000..3e4e48b0b5f --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +.gitignore \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000000..b11a094e280 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +# http://github.com/broadinstitute/scala-baseimage +FROM broadinstitute/scala-baseimage + +# Cromwell's HTTP Port +EXPOSE 8000 + +# Install Cromwell +ADD . /cromwell +RUN ["/bin/bash", "-c", "/cromwell/docker/install.sh /cromwell"] + +# Add Cromwell as a service (it will start when the container starts) +RUN mkdir /etc/service/cromwell +ADD docker/run.sh /etc/service/cromwell/run + +# These next 4 commands are for enabling SSH to the container. +# id_rsa.pub is referenced below, but this should be any public key +# that you want to be added to authorized_keys for the root user. +# Copy the public key into this directory because ADD cannot reference +# Files outside of this directory + +#EXPOSE 22 +#RUN rm -f /etc/service/sshd/down +#ADD id_rsa.pub /tmp/id_rsa.pub +#RUN cat /tmp/id_rsa.pub >> /root/.ssh/authorized_keys diff --git a/docker/install.sh b/docker/install.sh new file mode 100755 index 00000000000..3ea2888203e --- /dev/null +++ b/docker/install.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e + +CROMWELL_DIR=$1 +cd $CROMWELL_DIR +sbt assembly +CROMWELL_JAR=$(find target | grep 'cromwell.*\.jar') +mv $CROMWELL_JAR . +sbt clean diff --git a/docker/run.sh b/docker/run.sh new file mode 100755 index 00000000000..c379ba35fe9 --- /dev/null +++ b/docker/run.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -e + +java -jar $(find /cromwell | grep 'cromwell.*\.jar') server From c4e4b45e7e1606ecfc56da2c8bea0bbdd87561b0 Mon Sep 17 00:00:00 2001 From: Thibault Jeandet Date: Tue, 26 Jan 2016 15:20:36 -0500 Subject: [PATCH 003/326] adjust yaml file --- src/main/resources/swagger/cromwell.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/resources/swagger/cromwell.yaml b/src/main/resources/swagger/cromwell.yaml index c91796f0eee..83f40a404aa 100644 --- a/src/main/resources/swagger/cromwell.yaml +++ b/src/main/resources/swagger/cromwell.yaml @@ -597,12 +597,12 @@ definitions: required: - valid properties: - valid: - type: boolean - description: The validation of the workflow - error: + status: + type: string + description: The validation status of the workflow + message: type: string - description: The validation error of the workflow + description: Details about the validation WorkflowSubmitResponse: description: '' required: From 8a48eee787184ee960ec8c580b3b542f8dfae04e Mon Sep 17 00:00:00 2001 From: dgtester Date: Thu, 21 Jan 2016 16:15:27 -0500 Subject: [PATCH 004/326] Aborts jobs on shutdown. Added configuration option for abortJobsOnTerminate. Added support for server mode. SingleWorkflowRunnerActor now uses the internal state to determine whether the workflow is done (and not a separately maintained flag). Added configuration option for abortJobsOnTerminate. Added support for server mode. SingleWorkflowRunnerActor now uses the internal state to determine whether the workflow is done (and not a separately maintained flag). --- .../scala/cromwell/engine/CromwellActor.scala | 16 +++++++++++++ .../workflow/SingleWorkflowRunnerActor.scala | 12 ++++++++++ .../workflow/WorkflowManagerActor.scala | 24 +++++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/src/main/scala/cromwell/engine/CromwellActor.scala b/src/main/scala/cromwell/engine/CromwellActor.scala index 11d19aacc3f..6017d71a75b 100644 --- a/src/main/scala/cromwell/engine/CromwellActor.scala +++ b/src/main/scala/cromwell/engine/CromwellActor.scala @@ -1,10 +1,26 @@ package cromwell.engine import akka.util.Timeout +import com.typesafe.config.{ConfigException, ConfigFactory} import scala.concurrent.duration._ import scala.language.postfixOps trait CromwellActor { protected implicit val timeout = Timeout(5 seconds) + + /** + * Retrieves the configuration option that determines whether this actor should abort all jobs if it receives + * a shutdown hook. + * @return - The value of the configuration option, or 'false' if the option isn't specified. + */ + def getAbortJobsOnTerminate: Boolean = { + val config=ConfigFactory.load.getConfig("backend") + try { + config.getBoolean("abortJobsOnTerminate") + } catch { + case _:ConfigException => false + } + } + } diff --git a/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala b/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala index 0e5457c6d14..8e69829c9fa 100644 --- a/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala +++ b/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala @@ -79,6 +79,18 @@ case class SingleWorkflowRunnerActor(source: WorkflowSourceFiles, case Event(id: WorkflowId, data) => log.info(s"$tag: workflow ID UUID($id)") workflowManager ! SubscribeToWorkflow(id) + + if (getAbortJobsOnTerminate) { + Runtime.getRuntime.addShutdownHook(new Thread() { + override def run(): Unit = { + workflowManager ! WorkflowAbort(id) + log.info(s"$tag: Waiting for workflow $id to abort...") + while(stateName != Done) + Thread.sleep(1000) + log.info(s"$tag: Workflow $id aborted.") + } + }) + } stay using data.copy(id = Option(id)) case Event(Transition(_, _, WorkflowSucceeded), data) => workflowManager ! WorkflowOutputs(data.id.get) diff --git a/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala b/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala index 6b5e6dd0380..6a01b80572c 100644 --- a/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala +++ b/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala @@ -14,14 +14,15 @@ import cromwell.engine.db.DataAccess._ import cromwell.engine.db.ExecutionDatabaseKey import cromwell.engine.db.slick._ import cromwell.engine.workflow.WorkflowActor.{Restart, Start} +import cromwell.server.CromwellServer import cromwell.util.WriteOnceStore import cromwell.webservice._ import org.joda.time.DateTime import spray.json._ import wdl4s._ - +import wdl4s.values.WdlFile import scala.concurrent.ExecutionContext.Implicits.global -import scala.concurrent.Future +import scala.concurrent.{Await, Future} import scala.concurrent.duration._ import scala.io.Source import scala.language.postfixOps @@ -68,6 +69,25 @@ class WorkflowManagerActor(backend: Backend) extends Actor with CromwellActor { private val workflowStore = new WriteOnceStore[WorkflowId, WorkflowActorRef] + if (getAbortJobsOnTerminate) { + Runtime.getRuntime.addShutdownHook(new Thread() { + override def run(): Unit = { + log.info(s"$tag: Received shutdown signal. Aborting all running workflows...") + workflowStore.toMap.foreach{case (id, actor)=> + CromwellServer.workflowManagerActor ! WorkflowManagerActor.WorkflowAbort(id) + } + var numRemaining = -1 + while(numRemaining != 0) { + Thread.sleep(1000) + val result = globalDataAccess.getWorkflowsByState(Seq(WorkflowRunning, WorkflowAborting)) + numRemaining = Await.result(result,Duration.Inf).size + log.info(s"$tag: Waiting for all workflows to abort ($numRemaining remaining).") + } + log.info(s"$tag: All workflows aborted.") + } + }) + } + override def preStart() { restartIncompleteWorkflows() } From d2b053d7977560498363a0066424767d52a4e135 Mon Sep 17 00:00:00 2001 From: Thibault Jeandet Date: Thu, 22 Sep 2016 16:48:32 -0400 Subject: [PATCH 005/326] update version to 0.21 --- project/Version.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Version.scala b/project/Version.scala index 99e00f9f622..58c8590e286 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -4,7 +4,7 @@ import sbt._ object Version { // Upcoming release, or current if we're on the master branch - val cromwellVersion = "0.20" + val cromwellVersion = "0.21" // Adapted from SbtGit.versionWithGit def cromwellVersionWithGit: Seq[Setting[_]] = From fac45fc581f38c0364934f24cf3ffbca342d0cd0 Mon Sep 17 00:00:00 2001 From: Jeff Gentry Date: Fri, 23 Sep 2016 12:46:31 -0400 Subject: [PATCH 006/326] Fix incorrect URL --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3c5efc0099a..fd1477d503b 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [GET /api/workflows/:version/:id/metadata](#get-apiworkflowsversionidmetadata) * [POST /api/workflows/:version/:id/abort](#post-apiworkflowsversionidabort) * [GET /api/workflows/:version/backends](#get-apiworkflowsversionbackends) - * [GET /api/workflows/:version/stats](#get-apiworkflowsversionstats) + * [GET /api/engine/:version/stats](#get-apiengineversionstats) * [Error handling](#error-handling) * [Developer](#developer) * [Generating table of contents on Markdown files](#generating-table-of-contents-on-markdown-files) @@ -2501,18 +2501,18 @@ Server: spray-can/1.3.3 } ``` -## GET /api/workflows/:version/stats +## GET /api/engine/:version/stats This endpoint returns some basic statistics on the current state of the engine. At the moment that includes the number of running workflows and the number of active jobs. cURL: ``` -$ curl http://localhost:8000/api/workflows/v1/stats +$ curl http://localhost:8000/api/engine/v1/stats ``` HTTPie: ``` -$ http http://localhost:8000/api/workflows/v1/stats +$ http http://localhost:8000/api/engine/v1/stats ``` Response: From 88ff89f0ea378cea46aaaca7d206cb55acec482b Mon Sep 17 00:00:00 2001 From: Ruchi Munshi Date: Thu, 13 Oct 2016 10:13:07 -0400 Subject: [PATCH 007/326] release_notes --- CHANGELOG.md | 63 ++++++++++++++++++++++++++++++-------- project/Dependencies.scala | 4 +-- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ff214c551d..eabc031c95b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,22 +1,44 @@ # Cromwell Change Log -## 0.20 +## 0.22 -* The default per-upload bytes size for GCS is now the minumum 256K -instead of 64M. There is also an undocumented config key -`google.upload-buffer-bytes` that allows adjusting this internal value. +* Improved retries for Call Caching and general bug fixes. +* Now there are configurable caching strategies for the SharedFileSystem backends (i.e. Local). This new "caching" + stanza will be nested inside of: backend.{your_SFS_backend}.config.filesystems.local stanza. + See below for detailed descriptions of each configurable key. -* Updated Docker Hub hash retriever to parse json with [custom media -types](https://github.com/docker/distribution/blob/05b0ab0/docs/spec/manifest-v2-1.md). +``` +caching { + duplication-strategy: [ + "hard-link", "soft-link", "copy" + ] + + # Possible values: file, path + # "file" will compute an md5 hash of the file content. + # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", + # in order to allow for the original file path to be hashed. + hashing-strategy: "file" + + # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. + # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. + check-sibling-md5: false +} +``` +* Mulitple Input JSON files can now be submitted in server mode through the existing submission endpoint: /api/workflows/:version. + This endpoint accepts a POST request with a multipart/form-data encoded body. You can now include multiple keys for workflow inputs. -* Added a `/batch` submit endpoint that accepts a single wdl with -multiple input files. + The keys below can contain optional JSON file(s) of the workflow inputs. A skeleton file can be generated from wdltool using the "inputs" subcommand. + NOTE: Each prcoceeding workflowInput file will override any JSON key conflicts. -* The `/query` endpoint now supports querying by `id`, and submitting -parameters as a HTTP POST. + workflowInputs + workflowInputs_2 + workflowInputs_3 + workflowInputs_4 + workflowInputs_5 -## 0.21 +* Batched status polling of Google Jes for running jobs. +## 0.21 * Warning: Significant database updates when you switch from version 0.19 to 0.21 of Cromwell. There may be a long wait period for the migration to finish for large databases. @@ -71,7 +93,7 @@ task { command { echo "I'm private !" } - + runtime { docker: "ubuntu:latest" noAddress: true @@ -94,7 +116,7 @@ passed absolute paths for input `File`s. * Override the default database configuration by setting the keys `database.driver`, `database.db.driver`, `database.db.url`, etc. * Override the default database configuration by setting the keys -`database.driver`, `database.db.driver`, `database.db.url`, etc. +`database.driver`, `database.db.driver`, `database.db.url`, etc. For example: ``` @@ -111,3 +133,18 @@ database { } ``` +## 0.20 + +* The default per-upload bytes size for GCS is now the minumum 256K +instead of 64M. There is also an undocumented config key +`google.upload-buffer-bytes` that allows adjusting this internal value. + +* Updated Docker Hub hash retriever to parse json with [custom media +types](https://github.com/docker/distribution/blob/05b0ab0/docs/spec/manifest-v2-1.md). + +* Added a `/batch` submit endpoint that accepts a single wdl with +multiple input files. + +* The `/query` endpoint now supports querying by `id`, and submitting +parameters as a HTTP POST. + diff --git a/project/Dependencies.scala b/project/Dependencies.scala index ca5420d5e9b..a902e1b2a20 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -1,8 +1,8 @@ import sbt._ object Dependencies { - lazy val lenthallV = "0.19-882a763-SNAPSHOT" - lazy val wdl4sV = "0.6-2964173-SNAPSHOT" + lazy val lenthallV = "0.19" + lazy val wdl4sV = "0.6" lazy val sprayV = "1.3.3" /* spray-json is an independent project from the "spray suite" From 06d7f33f2bcd7c47fa714f69ac8daadfce2c77ee Mon Sep 17 00:00:00 2001 From: Ruchi Munshi Date: Thu, 13 Oct 2016 11:41:17 -0400 Subject: [PATCH 008/326] typos and reword --- CHANGELOG.md | 67 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eabc031c95b..cf48b60be6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,32 +3,45 @@ ## 0.22 * Improved retries for Call Caching and general bug fixes. -* Now there are configurable caching strategies for the SharedFileSystem backends (i.e. Local). This new "caching" - stanza will be nested inside of: backend.{your_SFS_backend}.config.filesystems.local stanza. +* Users will experience better scalability of status polling for Google JES. +* Now there are configurable caching strategies for a SharedFileSystem backend (i.e. Local, SFS) in the backend's stanza: See below for detailed descriptions of each configurable key. ``` -caching { - duplication-strategy: [ - "hard-link", "soft-link", "copy" - ] - - # Possible values: file, path - # "file" will compute an md5 hash of the file content. - # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", - # in order to allow for the original file path to be hashed. - hashing-strategy: "file" - - # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. - # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. - check-sibling-md5: false -} +backend { + ... + providers { + SFS_BackendName { + actor-factory = ... + config { + ... + filesystems { + local { + localization: [ + ... + ] + caching { + duplication-strategy: [ + "hard-link", "soft-link", "copy" + ] + # Possible values: file, path + # "file" will compute an md5 hash of the file content. + # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", + # in order to allow for the original file path to be hashed. + hashing-strategy: "file" + + # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. + # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. + check-sibling-md5: false + } ``` -* Mulitple Input JSON files can now be submitted in server mode through the existing submission endpoint: /api/workflows/:version. +* Multiple Input JSON files can now be submitted in server mode through the existing submission endpoint: /api/workflows/:version. This endpoint accepts a POST request with a multipart/form-data encoded body. You can now include multiple keys for workflow inputs. - The keys below can contain optional JSON file(s) of the workflow inputs. A skeleton file can be generated from wdltool using the "inputs" subcommand. - NOTE: Each prcoceeding workflowInput file will override any JSON key conflicts. + Each key below can contain an optional JSON file of the workflow inputs. A skeleton file can be generated from wdltool using the "inputs" subcommand. + NOTE: In case of key conflicts between multiple JSON files, higher values of x in workflowInputs_x override lower values. For example, an input + specified in workflowInputs_3 will override an input with the same name that was given in workflowInputs or workflowInputs_2. Similarly, an input + specified in workflowInputs_5 will override an input with the same name in any other input file. workflowInputs workflowInputs_2 @@ -36,7 +49,17 @@ caching { workflowInputs_4 workflowInputs_5 -* Batched status polling of Google Jes for running jobs. +* You can now limit the number of concurrent jobs for a backend by specifying the following option in the backend's config stanza: +``` +backend { + ... + providers { + BackendName { + actor-factory = ... + config { + concurrent-job-limit = 5 +``` + ## 0.21 @@ -135,7 +158,7 @@ database { ## 0.20 -* The default per-upload bytes size for GCS is now the minumum 256K +* The default per-upload bytes size for GCS is now the minimum 256K instead of 64M. There is also an undocumented config key `google.upload-buffer-bytes` that allows adjusting this internal value. From f7f068cf59a928471330ae5f37f8bb4401872cb3 Mon Sep 17 00:00:00 2001 From: Jeff Gentry Date: Fri, 2 Dec 2016 20:02:46 -0500 Subject: [PATCH 009/326] Release 23 (#1735) --- .travis.yml | 14 +- CHANGELOG.md | 119 +++ MakingABackend.MD | 197 ++++ README.md | 621 ++++++++++++- SecurityRecommendations.md | 51 ++ .../backend/BackendCacheHitCopyingActor.scala | 4 +- .../backend/BackendJobBreadCrumb.scala | 14 + .../backend/BackendJobExecutionActor.scala | 15 +- .../backend/BackendLifecycleActor.scala | 6 +- .../BackendLifecycleActorFactory.scala | 26 +- .../BackendWorkflowInitializationActor.scala | 13 +- .../cromwell/backend/OutputEvaluator.scala | 31 - .../backend/RuntimeAttributeDefinition.scala | 4 +- .../async/AsyncBackendJobExecutionActor.scala | 10 +- .../backend/async/ExecutionHandle.scala | 6 +- .../backend/async/ExecutionResult.scala | 8 +- .../main/scala/cromwell/backend/backend.scala | 30 +- .../callcaching/CacheHitDuplicating.scala | 30 +- .../scala/cromwell/backend/io/JobPaths.scala | 70 +- .../backend/io/JobPathsWithDocker.scala | 39 + .../cromwell/backend/io/WorkflowPaths.scala | 35 +- ...rkflowPathsBackendInitializationData.scala | 7 +- .../backend/io/WorkflowPathsWithDocker.scala | 16 + .../main/scala/cromwell/backend/package.scala | 12 +- .../RuntimeAttributesValidation.scala | 5 +- .../scala/cromwell/backend/wdl/Command.scala | 31 + .../cromwell/backend/wdl/FileSystems.scala | 30 - .../backend/wdl/OutputEvaluator.scala | 19 + .../cromwell/backend/wdl/PureFunctions.scala | 60 -- .../backend/wdl/ReadLikeFunctions.scala | 7 +- .../cromwell/backend/wdl/WriteFunctions.scala | 12 +- .../DefaultWorkflowFileSystemProvider.scala | 9 - .../wfs/DefaultWorkflowPathBuilder.scala | 8 + .../wfs/WorkflowFileSystemProvider.scala | 34 - .../backend/wfs/WorkflowPathBuilder.scala | 25 + .../scala/cromwell/backend/BackendSpec.scala | 58 +- .../cromwell/backend/io/JobPathsSpec.scala | 40 +- .../cromwell/backend/io/TestWorkflows.scala | 14 +- .../backend/io/WorkflowPathsSpec.scala | 52 +- .../RuntimeAttributesDefaultSpec.scala | 2 +- .../PureStandardLibraryFunctionsSpec.scala | 30 + build.sbt | 1 + core/src/main/resources/logback.xml | 88 ++ core/src/main/resources/reference.conf | 38 +- .../main/scala/cromwell/core/CallKey.scala | 7 + .../scala/cromwell/core/ExecutionStatus.scala | 2 +- .../scala/cromwell/core/ExecutionStore.scala | 13 - .../src/main/scala/cromwell/core/JobKey.scala | 6 +- .../scala/cromwell/core/OutputStore.scala | 40 - .../scala/cromwell/core/PathFactory.scala | 135 --- .../cromwell/core/WorkflowMetadataKeys.scala | 2 + .../cromwell/core/WorkflowSourceFiles.scala | 9 - .../core/WorkflowSourceFilesCollection.scala | 43 + .../core/callcaching/CallCachingMode.scala | 6 +- .../cromwell/core/logging/JobLogger.scala | 2 +- .../core/logging/WorkflowLogger.scala | 8 +- .../main/scala/cromwell/core/package.scala | 2 +- .../core/path/CustomRetryParams.scala | 25 + .../core/path/DefaultPathBuilder.scala | 21 + .../core/path/DefaultPathBuilderFactory.scala | 8 + .../core/path/JavaWriterImplicits.scala | 13 + .../cromwell/core/path/PathBuilder.scala | 10 + .../core/path/PathBuilderFactory.scala | 11 + .../cromwell/core/{ => path}/PathCopier.scala | 23 +- .../cromwell/core/path/PathFactory.scala | 57 ++ .../cromwell/core/path/PathImplicits.scala | 15 + .../core/path/PathParsingException.scala | 5 + .../scala/cromwell/core/path/PathWriter.scala | 76 ++ .../core/path/proxy/FileSystemProxy.scala | 25 + .../cromwell/core/path/proxy/PathProxy.scala | 44 + .../RetryableFileSystemProviderProxy.scala | 57 ++ .../scala/cromwell/core/retry/Backoff.scala | 2 +- .../scala/cromwell/core/retry/Retry.scala | 4 +- .../core/simpleton/WdlValueBuilder.scala | 32 +- .../core/simpleton/WdlValueSimpleton.scala | 2 + .../WdlValueJsonFormatter.scala | 1 + .../scala/cromwell/util/PromiseActor.scala | 38 +- .../cromwell/util/StopAndLogSupervisor.scala | 24 + .../main/scala/cromwell/util/TryUtil.scala | 45 + .../path/RetryableFileSystemProxySpec.scala | 278 ++++++ .../core/simpleton/WdlValueBuilderSpec.scala | 129 ++- .../test/scala/cromwell/util/SampleWdl.scala | 19 +- .../util/WdlValueJsonFormatterSpec.scala | 28 + .../src/main/resources/changelog.xml | 5 + .../changesets/embiggen_metadata_value.xml | 50 + .../encrypt_and_clear_workflow_options.xml | 14 + .../rename_workflow_options_in_metadata.xml | 10 + .../changesets/sub_workflow_store.xml | 62 ++ .../workflow_store_imports_file.xml | 15 + .../migration/src/main/resources/logback.xml | 36 - .../migration/custom/BatchedTaskChange.scala | 139 +++ .../custom/MigrationTaskChange.scala | 48 + .../symbol => custom}/QueryPaginator.scala | 2 +- .../symbol => custom}/ResultSetIterator.scala | 2 +- .../CallOutputSymbolTableMigration.scala | 1 - .../symbol/InputSymbolTableMigration.scala | 1 - .../table/symbol/MetadataStatement.scala | 5 +- .../table/symbol/SymbolTableMigration.scala | 109 +-- .../WorkflowOutputSymbolTableMigration.scala | 1 - .../RenameWorkflowOptionKeysMigration.scala | 18 +- .../ClearMetadataEntryWorkflowOptions.scala | 15 + ...yptWorkflowStoreEntryWorkflowOptions.scala | 14 + .../RenameWorkflowOptionsInMetadata.scala | 38 + .../WorkflowOptionsChange.scala | 69 ++ .../WorkflowOptionsRenaming.scala | 21 + .../slick/CallCachingSlickDatabase.scala | 1 + .../database/slick/SlickDatabase.scala | 3 +- .../slick/SubWorkflowStoreSlickDatabase.scala | 67 ++ .../slick/tables/DataAccessComponent.scala | 6 +- .../SubWorkflowStoreEntryComponent.scala | 62 ++ .../tables/WorkflowStoreEntryComponent.scala | 6 +- .../cromwell/database/sql/SqlDatabase.scala | 3 +- .../sql/SubWorkflowStoreSqlDatabase.scala | 21 + .../sql/tables/SubWorkflowStoreEntry.scala | 12 + .../sql/tables/WorkflowStoreEntry.scala | 3 +- engine/src/main/resources/logback.xml | 36 - .../src/main/resources/swagger/cromwell.yaml | 17 + .../workflowTimings/workflowTimings.html | 184 +++- .../cromwell/engine/EngineFilesystems.scala | 47 +- .../engine/EngineWorkflowDescriptor.scala | 26 +- .../scala/cromwell/engine/WdlFunctions.scala | 12 +- .../backend/EnhancedWorkflowOptions.scala | 16 - .../workflow/SingleWorkflowRunnerActor.scala | 258 +++--- .../engine/workflow/WorkflowActor.scala | 83 +- .../workflow/WorkflowManagerActor.scala | 102 ++- .../lifecycle/CopyWorkflowLogsActor.scala | 3 +- .../lifecycle/CopyWorkflowOutputsActor.scala | 46 +- .../MaterializeWorkflowDescriptorActor.scala | 152 +++- .../lifecycle/WorkflowFinalizationActor.scala | 30 +- .../WorkflowInitializationActor.scala | 4 +- .../execution/CallMetadataHelper.scala | 135 +++ .../execution/EngineJobExecutionActor.scala | 137 ++- .../lifecycle/execution/ExecutionStore.scala | 109 +++ .../execution/JobPreparationActor.scala | 151 +-- .../lifecycle/execution/OutputStore.scala | 98 ++ .../execution/SubWorkflowExecutionActor.scala | 275 ++++++ .../lifecycle/execution/WdlLookup.scala | 106 --- .../execution/WorkflowExecutionActor.scala | 858 +++++++++--------- .../WorkflowExecutionActorData.scala | 118 ++- .../execution/WorkflowMetadataHelper.scala | 37 + .../execution/callcaching/CallCache.scala | 10 +- .../CallCacheInvalidateActor.scala | 1 + .../callcaching/CallCacheWriteActor.scala | 6 +- .../callcaching/EngineJobHashingActor.scala | 4 +- .../lifecycle/execution/package.scala | 12 +- .../JobExecutionTokenDispenserActor.scala | 2 +- .../workflowstore/InMemoryWorkflowStore.scala | 6 +- .../workflowstore/SqlWorkflowStore.scala | 28 +- .../workflowstore/WorkflowStore.scala | 4 +- .../workflowstore/WorkflowStoreActor.scala | 48 +- .../workflowstore/workflowstore_.scala | 4 +- .../jobstore/EmptyJobStoreActor.scala | 15 + .../scala/cromwell/jobstore/jobstore_.scala | 2 +- .../cromwell/server/CromwellRootActor.scala | 10 +- .../cromwell/server/CromwellServer.scala | 3 + .../EmptySubWorkflowStoreActor.scala | 17 + .../SqlSubWorkflowStore.scala | 31 + .../subworkflowstore/SubWorkflowStore.scala | 19 + .../SubWorkflowStoreActor.scala | 72 ++ .../webservice/CromwellApiHandler.scala | 8 +- .../webservice/CromwellApiService.scala | 122 ++- .../webservice/EngineStatsActor.scala | 10 +- .../webservice/WorkflowJsonSupport.scala | 16 +- .../metadata/IndexedJsonValue.scala | 22 +- .../metadata/MetadataBuilderActor.scala | 122 ++- .../cromwell/ArrayOfArrayCoercionSpec.scala | 4 +- .../scala/cromwell/ArrayWorkflowSpec.scala | 18 +- .../cromwell/CallCachingWorkflowSpec.scala | 2 +- .../cromwell/CopyWorkflowOutputsSpec.scala | 6 +- ...itSpec.scala => CromwellTestKitSpec.scala} | 44 +- .../cromwell/DeclarationWorkflowSpec.scala | 12 +- .../cromwell/FilePassingWorkflowSpec.scala | 14 +- .../test/scala/cromwell/MapWorkflowSpec.scala | 16 +- .../scala/cromwell/MetadataWatchActor.scala | 20 +- ...ultipleFilesWithSameNameWorkflowSpec.scala | 6 +- .../cromwell/OptionalParamWorkflowSpec.scala | 12 +- .../PostfixQuantifierWorkflowSpec.scala | 12 +- .../scala/cromwell/RestartWorkflowSpec.scala | 6 +- .../scala/cromwell/ScatterWorkflowSpec.scala | 40 +- .../cromwell/SimpleWorkflowActorSpec.scala | 32 +- .../WdlFunctionsAtWorkflowLevelSpec.scala | 6 +- .../scala/cromwell/WorkflowFailSlowSpec.scala | 2 +- .../scala/cromwell/WorkflowOutputsSpec.scala | 20 +- .../cromwell/engine/EngineFunctionsSpec.scala | 12 +- .../cromwell/engine/WorkflowAbortSpec.scala | 4 +- .../engine/WorkflowManagerActorSpec.scala | 6 +- .../engine/WorkflowStoreActorSpec.scala | 81 +- .../DefaultBackendJobExecutionActor.scala | 8 +- .../RetryableBackendJobExecutionActor.scala | 6 +- ...etryableBackendLifecycleActorFactory.scala | 4 +- .../engine/backend/mock/package.scala | 2 +- .../SingleWorkflowRunnerActorSpec.scala | 29 +- .../engine/workflow/WorkflowActorSpec.scala | 26 +- .../workflow/WorkflowDescriptorBuilder.scala | 10 +- ...terializeWorkflowDescriptorActorSpec.scala | 68 +- .../SubWorkflowExecutionActorSpec.scala | 213 +++++ .../WorkflowExecutionActorSpec.scala | 14 +- .../EngineJobHashingActorSpec.scala | 22 +- ...jeaBackendIsCopyingCachedOutputsSpec.scala | 39 +- .../ejea/EjeaCheckingJobStoreSpec.scala | 14 +- .../execution/ejea/EjeaPreparingJobSpec.scala | 14 +- .../EjeaRequestingExecutionTokenSpec.scala | 4 +- .../ejea/EjeaUpdatingJobStoreSpec.scala | 4 +- .../ejea/EngineJobExecutionActorSpec.scala | 4 +- .../EngineJobExecutionActorSpecUtil.scala | 10 +- .../execution/ejea/PerTestHelper.scala | 83 +- .../workflow/mocks/DeclarationMock.scala | 21 + .../engine/workflow/mocks/TaskMock.scala | 27 + .../workflow/mocks/WdlExpressionMock.scala | 32 + .../jobstore/JobStoreServiceSpec.scala | 13 +- .../jobstore/JobStoreWriterSpec.scala | 4 +- .../SubWorkflowStoreSpec.scala | 87 ++ .../webservice/CromwellApiServiceSpec.scala | 14 +- .../webservice/EngineStatsActorSpec.scala | 10 +- .../webservice/MetadataBuilderActorSpec.scala | 115 ++- .../filesystems/gcs/ContentTypeOption.scala | 15 - .../filesystems/gcs/GcsFileAttributes.scala | 23 - .../filesystems/gcs/GcsFileSystem.scala | 68 -- .../gcs/GcsFileSystemProvider.scala | 295 ------ .../filesystems/gcs/GcsPathBuilder.scala | 100 ++ .../gcs/GcsPathBuilderFactory.scala | 48 + .../filesystems/gcs/GoogleAuthMode.scala | 186 ---- .../filesystems/gcs/GoogleConfiguration.scala | 14 +- .../cromwell/filesystems/gcs/NioGcsPath.scala | 191 ---- .../filesystems/gcs/auth/GoogleAuthMode.scala | 187 ++++ .../auth/RefreshableOAuth2Credentials.scala | 31 + .../cromwell/filesystems/gcs/package.scala | 6 - .../filesystems/gcs/GcsIntegrationTest.scala | 5 - .../filesystems/gcs/GcsPathBuilderSpec.scala | 31 + .../gcs/GoogleConfigurationSpec.scala | 16 +- .../gcs/GoogleCredentialFactorySpec.scala | 158 ---- .../gcs/MockGcsFileSystemBuilder.scala | 9 - .../filesystems/gcs/NioGcsPathSpec.scala | 291 ------ .../gcs/RefreshTokenModeSpec.scala | 26 - project/Dependencies.scala | 53 +- project/Merging.scala | 5 + project/Settings.scala | 31 +- project/Testing.scala | 20 +- project/Version.scala | 7 +- .../services/metadata/CallMetadataKeys.scala | 2 + .../services/metadata/MetadataQuery.scala | 11 +- .../services/metadata/MetadataService.scala | 3 +- .../impl/MetadataDatabaseAccess.scala | 14 +- .../metadata/impl/ReadMetadataActor.scala | 11 +- .../cromwell/services/ServicesStoreSpec.scala | 1 + src/bin/travis/afterSuccess.sh | 20 + src/bin/travis/publishSnapshot.sh | 11 - src/bin/travis/resources/centaur.inputs | 10 +- src/bin/travis/resources/centaur.wdl | 8 +- src/bin/travis/resources/local_centaur.conf | 3 + src/bin/travis/test.sh | 2 + src/bin/travis/testCentaurJes.sh | 6 +- src/bin/travis/testCentaurLocal.sh | 4 +- src/bin/travis/testCheckPublish.sh | 6 + .../scala/cromwell/CromwellCommandLine.scala | 14 +- src/main/scala/cromwell/Main.scala | 16 +- .../cromwell/CromwellCommandLineSpec.scala | 23 +- .../htcondor/HtCondorBackendFactory.scala | 10 +- .../HtCondorInitializationActor.scala | 6 +- .../htcondor/HtCondorJobExecutionActor.scala | 52 +- .../htcondor/HtCondorRuntimeAttributes.scala | 27 +- .../impl/htcondor/HtCondorWrapper.scala | 27 +- .../impl/htcondor/caching/CacheActor.scala | 6 +- .../CachedResultLocalization.scala | 2 +- .../caching/model/CachedExecutionResult.scala | 4 +- .../provider/mongodb/MongoCacheActor.scala | 4 +- .../impl/htcondor/HtCondorCommandSpec.scala | 16 +- .../HtCondorInitializationActorSpec.scala | 8 +- .../HtCondorJobExecutionActorSpec.scala | 48 +- .../HtCondorRuntimeAttributesSpec.scala | 50 +- .../CachedResultLocalizationSpec.scala | 4 +- .../mongodb/MongoCacheActorSpec.scala | 8 +- .../backend/impl/jes/GenomicsFactory.scala | 24 +- .../JesAsyncBackendJobExecutionActor.scala | 276 +++--- .../backend/impl/jes/JesAttributes.scala | 24 +- .../jes/JesBackendLifecycleActorFactory.scala | 29 +- .../impl/jes/JesBackendSingletonActor.scala | 8 +- .../impl/jes/JesCacheHitCopyingActor.scala | 6 +- .../backend/impl/jes/JesCallPaths.scala | 82 -- .../backend/impl/jes/JesConfiguration.scala | 28 +- .../impl/jes/JesExpressionFunctions.scala | 38 +- .../impl/jes/JesFinalizationActor.scala | 31 +- .../backend/impl/jes/JesImplicits.scala | 41 - .../impl/jes/JesInitializationActor.scala | 37 +- .../impl/jes/JesJobCachingActorHelper.scala | 44 +- .../impl/jes/JesJobExecutionActor.scala | 25 +- .../backend/impl/jes/JesJobPaths.scala | 60 ++ .../backend/impl/jes/JesWorkflowPaths.scala | 62 +- .../scala/cromwell/backend/impl/jes/Run.scala | 7 +- .../impl/jes/authentication/JesAuths.scala | 5 + .../jes/authentication/JesCredentials.scala | 5 - .../authentication/JesVMAuthentication.scala | 2 +- .../callcaching/JesBackendFileHashing.scala | 4 +- .../backend/impl/jes/io/package.scala | 9 +- .../statuspolling/JesApiQueryManager.scala | 22 +- .../jes/statuspolling/JesPollingActor.scala | 46 +- .../statuspolling/JesPollingActorClient.scala | 5 +- ...JesAsyncBackendJobExecutionActorSpec.scala | 145 ++- .../backend/impl/jes/JesAttributesSpec.scala | 20 +- .../backend/impl/jes/JesCallPathsSpec.scala | 37 +- .../impl/jes/JesConfigurationSpec.scala | 19 +- .../impl/jes/JesInitializationActorSpec.scala | 20 +- .../impl/jes/JesJobExecutionActorSpec.scala | 111 +++ .../backend/impl/jes/JesTestConfig.scala | 2 +- .../impl/jes/JesWorkflowPathsSpec.scala | 18 +- .../backend/impl/jes/MockObjects.scala | 9 - .../cromwell/backend/impl/jes/RunSpec.scala | 6 +- .../JesApiQueryManagerSpec.scala | 63 +- .../statuspolling/JesPollingActorSpec.scala | 19 +- .../config/ConfigAsyncJobExecutionActor.scala | 23 +- .../sfs/config/ConfigBackendFileHashing.scala | 12 +- .../sfs/config/ConfigHashingStrategy.scala | 26 +- .../impl/sfs/config/ConfigWdlNamespace.scala | 4 +- .../sfs/config/DeclarationValidation.scala | 18 +- .../sfs/GcsWorkflowFileSystemProvider.scala | 36 - .../backend/sfs/SharedFileSystem.scala | 60 +- ...aredFileSystemAsyncJobExecutionActor.scala | 65 +- ...leSystemBackendLifecycleActorFactory.scala | 28 +- ...SharedFileSystemCacheHitCopyingActor.scala | 7 +- .../SharedFileSystemExpressionFunctions.scala | 28 +- .../SharedFileSystemInitializationActor.scala | 18 +- ...haredFileSystemJobCachingActorHelper.scala | 5 +- .../config/ConfigHashingStrategySpec.scala | 10 + ...redFileSystemInitializationActorSpec.scala | 10 +- ...haredFileSystemJobExecutionActorSpec.scala | 53 +- .../backend/sfs/SharedFileSystemSpec.scala | 20 +- .../sfs/TestLocalAsyncJobExecutionActor.scala | 4 +- .../impl/spark/SparkBackendFactory.scala | 10 +- .../impl/spark/SparkInitializationActor.scala | 6 +- .../impl/spark/SparkJobExecutionActor.scala | 44 +- .../backend/impl/spark/SparkProcess.scala | 23 +- .../spark/SparkInitializationActorSpec.scala | 7 +- .../spark/SparkJobExecutionActorSpec.scala | 50 +- .../spark/SparkRuntimeAttributesSpec.scala | 11 +- 334 files changed, 8400 insertions(+), 4793 deletions(-) create mode 100644 MakingABackend.MD create mode 100644 SecurityRecommendations.md create mode 100644 backend/src/main/scala/cromwell/backend/BackendJobBreadCrumb.scala delete mode 100644 backend/src/main/scala/cromwell/backend/OutputEvaluator.scala create mode 100644 backend/src/main/scala/cromwell/backend/io/JobPathsWithDocker.scala create mode 100644 backend/src/main/scala/cromwell/backend/io/WorkflowPathsWithDocker.scala create mode 100644 backend/src/main/scala/cromwell/backend/wdl/Command.scala delete mode 100644 backend/src/main/scala/cromwell/backend/wdl/FileSystems.scala create mode 100644 backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala delete mode 100644 backend/src/main/scala/cromwell/backend/wdl/PureFunctions.scala delete mode 100644 backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowFileSystemProvider.scala create mode 100644 backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowPathBuilder.scala delete mode 100644 backend/src/main/scala/cromwell/backend/wfs/WorkflowFileSystemProvider.scala create mode 100644 backend/src/main/scala/cromwell/backend/wfs/WorkflowPathBuilder.scala create mode 100644 backend/src/test/scala/cromwell/backend/wdl/PureStandardLibraryFunctionsSpec.scala create mode 100644 core/src/main/resources/logback.xml create mode 100644 core/src/main/scala/cromwell/core/CallKey.scala delete mode 100644 core/src/main/scala/cromwell/core/ExecutionStore.scala delete mode 100644 core/src/main/scala/cromwell/core/OutputStore.scala delete mode 100644 core/src/main/scala/cromwell/core/PathFactory.scala delete mode 100644 core/src/main/scala/cromwell/core/WorkflowSourceFiles.scala create mode 100644 core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala create mode 100644 core/src/main/scala/cromwell/core/path/CustomRetryParams.scala create mode 100644 core/src/main/scala/cromwell/core/path/DefaultPathBuilder.scala create mode 100644 core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala create mode 100644 core/src/main/scala/cromwell/core/path/JavaWriterImplicits.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathBuilder.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala rename core/src/main/scala/cromwell/core/{ => path}/PathCopier.scala (65%) create mode 100644 core/src/main/scala/cromwell/core/path/PathFactory.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathImplicits.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathParsingException.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathWriter.scala create mode 100644 core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala create mode 100644 core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala create mode 100644 core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala create mode 100644 core/src/main/scala/cromwell/util/StopAndLogSupervisor.scala create mode 100644 core/src/main/scala/cromwell/util/TryUtil.scala create mode 100644 core/src/test/scala/cromwell/core/path/RetryableFileSystemProxySpec.scala create mode 100644 core/src/test/scala/cromwell/util/WdlValueJsonFormatterSpec.scala create mode 100644 database/migration/src/main/resources/changesets/embiggen_metadata_value.xml create mode 100644 database/migration/src/main/resources/changesets/encrypt_and_clear_workflow_options.xml create mode 100644 database/migration/src/main/resources/changesets/rename_workflow_options_in_metadata.xml create mode 100644 database/migration/src/main/resources/changesets/sub_workflow_store.xml create mode 100644 database/migration/src/main/resources/changesets/workflow_store_imports_file.xml delete mode 100644 database/migration/src/main/resources/logback.xml create mode 100644 database/migration/src/main/scala/cromwell/database/migration/custom/BatchedTaskChange.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/custom/MigrationTaskChange.scala rename database/migration/src/main/scala/cromwell/database/migration/{metadata/table/symbol => custom}/QueryPaginator.scala (88%) rename database/migration/src/main/scala/cromwell/database/migration/{metadata/table/symbol => custom}/ResultSetIterator.scala (73%) create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/ClearMetadataEntryWorkflowOptions.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/EncryptWorkflowStoreEntryWorkflowOptions.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/RenameWorkflowOptionsInMetadata.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsChange.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsRenaming.scala create mode 100644 database/sql/src/main/scala/cromwell/database/slick/SubWorkflowStoreSlickDatabase.scala create mode 100644 database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala create mode 100644 database/sql/src/main/scala/cromwell/database/sql/SubWorkflowStoreSqlDatabase.scala create mode 100644 database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala delete mode 100644 engine/src/main/resources/logback.xml delete mode 100644 engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala delete mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala rename engine/src/{test => main}/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala (88%) create mode 100644 engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala create mode 100644 engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala create mode 100644 engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala create mode 100644 engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala create mode 100644 engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala rename engine/src/test/scala/cromwell/{CromwellTestkitSpec.scala => CromwellTestKitSpec.scala} (94%) create mode 100644 engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala create mode 100644 engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala create mode 100644 engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala create mode 100644 engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala create mode 100644 engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala create mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala create mode 100755 src/bin/travis/afterSuccess.sh delete mode 100755 src/bin/travis/publishSnapshot.sh create mode 100755 src/bin/travis/testCheckPublish.sh delete mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala delete mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala create mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala create mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala delete mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala create mode 100644 supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala delete mode 100644 supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala delete mode 100644 supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala diff --git a/.travis.yml b/.travis.yml index 5784eaf62ec..f4d0d9b2eeb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,14 +6,18 @@ scala: jdk: - oraclejdk8 env: - # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked - - BUILD_TYPE=sbt - - BUILD_TYPE=centaurJes - - BUILD_TYPE=centaurLocal + global: + - CENTAUR_BRANCH=develop + matrix: + # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked + - BUILD_TYPE=sbt + - BUILD_TYPE=checkPublish + - BUILD_TYPE=centaurJes + - BUILD_TYPE=centaurLocal script: - src/bin/travis/test.sh after_success: - - src/bin/travis/publishSnapshot.sh + - src/bin/travis/afterSuccess.sh deploy: provider: script script: src/bin/travis/publishRelease.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index cf48b60be6b..3f7b649eee6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,124 @@ # Cromwell Change Log +## 23 + +* The `meta` and `parameter_meta` blocks are now valid within `workflow` blocks, not just `task` +* The JES backend configuration now has an option `genomics-api-queries-per-100-seconds` to help tune the rate of batch polling against the JES servers. Users with quotas larger than default should make sure to set this value. +* Added an option `call-caching.invalidate-bad-cache-results` (default: `true`). If true, Cromwell will invalidate cached results which have failed to copy as part of a cache hit. +* Timing diagrams and metadata now receive more fine grained workflow states between submission and Running. +* Support for the Pair WDL type (e.g. `Pair[Int, File] floo = (3, "gs://blar/blaz/qlux.txt")`) +* Added support for new WDL functions: + * `zip: (Array[X], Array[Y]) => Array[Pair[X, Y]]` - align items in the two arrays by index and return them as WDL pairs + * `cross: (Array[X], Array[Y]) => Array[Pair[X, Y]]` - create every possible pair from the two input arrays and return them all as WDL pairs + * `transpose: (Array[Array[X]]) => Array[Array[X]]` compute the matrix transpose for a 2D array. Assumes each inner array has the same length. +* By default, `system.abort-jobs-on-terminate` is false when running `java -jar cromwell.jar server`, and true when running `java -jar cromwell.jar run `. +* Enable WDL imports when running in Single Workflow Runner Mode. +* Support for sub workflows (see [Annex A](#annex-a---workflow-outputs)) +* Enable WDL imports when running in Single Workflow Runner Mode as well as Server Mode +* Support for WDL imports through an additional imports.zip parameter +* Support for sub workflows +* Corrected file globbing in JES to correctly report all generated files. Additionally, file globbing in JES now uses bash-style glob syntax instead of python style glob syntax +* Support declarations as graph nodes +* Added the ability to override the default service account that the compute VM is started with via the configuration option `JES.config.genomics.compute-service-account` or through the workflow options parameter `google_compute_service_account`. More details can be found in the README.md +* Fix bugs related to the behavior of Cromwell in Single Workflow Runner Mode. Cromwell will now exit once a workflow completes in Single Workflow Runner Mode. Additionally, when restarting Cromwell in Single Workflow Runner Mode, Cromwell will no longer restart incomplete workflows from a previous session. + +### Annex A - Workflow outputs + +The WDL specification has changed regarding [workflow outputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#outputs) to accommodate sub workflows. +This change is backward compatible in terms of runnable WDLs (WDL files using the deprecated workflow outputs syntax will still run the same). +The only visible change lies in the metadata (as well as the console output in single workflow mode, when workflow outputs are printed out at the end of a successful workflow). + +TL;DR Unless you are parsing or manipulating the "key" by which workflow outputs are referenced in the metadata (and/or the console output for single workflow mode), you can skip the following explanation. + +*Metadata Response* +``` +{ + ... + outputs { + "task_output_1": "hello", + "task_output_2": "world" + ^ + If you don't manipulate this part of the metadata, then skip this section + } +} +``` + +In order to maintain backward compatibility, workflow outputs expressed with the deprecated syntax are "expanded" to the new syntax. Here is an example: + +``` +task t { + command { + #do something + } + output { + String out1 = "hello" + String out2 = "world" + } +} +``` + +``` + workflow old_syntax { + call t + output { + t.* + } + } +``` + +``` + workflow new_syntax { + call t + output { + String wf_out1 = t.out1 + String wf_out2 = t.out2 + } + } +``` + +The new syntax allows for type checking of the outputs as well as expressions. It also allows for explicitly naming to the outputs. +The old syntax doesn't give the ability to name workflow outputs. For consistency reasons, Cromwell will generate a "new syntax" workflow output for each task output, and name them. +Their name will be generated using their FQN, which would give + +``` +output { + String w.t.out1 = t.out1 + String w.t.out2 = t.out2 +} +``` + +However as the FQN separator is `.`, the name itself cannot contain any `.`. +For that reason, `.` are replaced with `_` : + +*Old syntax expanded to new syntax* +``` +output { + String w_t_out1 = t.out1 + String w_t_out2 = t.out2 +} +``` + +The consequence is that the workflow outputs section of the metadata for `old_syntax` would previously look like + + ``` + outputs { + "w.t.out1": "hello", + "w.t.out2": "hello" + } + ``` + +but it will now look like + +``` + outputs { + "w_t_out1": "hello", + "w_t_out2": "hello" + } +``` + +The same applies for the console output of a workflow run in single workflow mode. + + ## 0.22 * Improved retries for Call Caching and general bug fixes. diff --git a/MakingABackend.MD b/MakingABackend.MD new file mode 100644 index 00000000000..5106b37260c --- /dev/null +++ b/MakingABackend.MD @@ -0,0 +1,197 @@ +# Making a backend + +## Part 0: Introduction + +- These notes were added while making a new AWS backend for Amazon AWS. + +## Part 1 (October 13 2016): The skeleton: + +To start with, I just need to create a bunch of boilerplate which will eventually be filled in with all of the lovely AWS details! + +### Defining the awsBackend project: + +- Added entries to `project/Settings.scala`, `project/Dependencies.scala` and `build.sbt` +- This was mainly just a copy/paste from existing backend projects. I made a few typos renaming everything and linking the dependencies properly though! +- E.g. In my first commit I forgot to update the libraryDependencies name for my AWS backend project: +``` + val awsBackendSettings = List( + name := "cromwell-aws-backend", + libraryDependencies ++= awsBackendDependencies + ) ++ commonSettings +``` +- I guessed that I'd need the AWS SDK so I included that immediately in Dependencies.scala: +``` + val awsBackendDependencies = List( + "com.amazonaws" % "aws-java-sdk" % "1.11.41" + ) +``` +- In build.scala I had to also edit the `lazy val root` to include a new `.aggregate(awsBackend)` and a new `.dependsOn(awsBackend)` + +### Directory structure: + +- This is probably going to be autogenerated for you in the directories specified in the above files. I'd already added my own directory structure and sbt managed to pick it up correctly in `supportedBackends/aws`. + +### AWS Job Execution Actor: +- To run a job, Cromwell needs to instantiate a Job Execution actor. I'll fill in the details later but for now, I'll just add the constructor, props, and an unimplemented method definition for `execute`: +``` +class AwsJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, + override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { + + override def execute: Future[BackendJobExecutionResponse] = ??? +} + +object AwsJobExecutionActor { + def props(jobDescriptor: BackendJobDescriptor, + configurationDescriptor: BackendConfigurationDescriptor): Props = Props(new AwsJobExecutionActor(jobDescriptor, configurationDescriptor)) +} +``` + +### Actor factory: +- This is the class which tells Cromwell which classes represent job execution actors, initialization actors and so on. I'm just adding a skeleton for now, with a constructor of the form the Cromwell expects: +``` +case class AwsBackendActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { + + override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + backendSingletonActor: Option[ActorRef]): Props = AwsJobExecutionActor.props(jobDescriptor, configurationDescriptor) +} +``` +- There are a few other actor definitions that can be added to this file over time. But the only one that Cromwell *requires* to work is the job execution actor. + +### Reference conf: + +- Reference.conf is a set of reference options which shows people how to enable the backends that they want. So I'll add the initial config which people would add if they wanted the AWS backend (commented out in the reference so it's not enabled by default). This goes below all the other backend references: +``` + #AWS { + # actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" + # config { + # + # } + #} +``` + +### Application.conf + +- OK so I've now told people how to add this backend... Now I actually add it to my own personal configuration file so I can try it out! +``` +backend { + default = "AWS" + providers { + AWS { + actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" + config { + + } + } + } +} +``` + +### Trying it out +So we now have a backend skeleton! What happens when we run it? Well hopefully Cromwell will instantiate the backend far enough to reach the unimplemented execute method and then fall over. Let's give it a go! +- I fire up cromwell in server mode with my modified application.conf. +- I create a sample WDL that would sleep for 20 seconds if it actually worked: +The input WDL: +``` +task sleep { + command { sleep 20 } +} +workflow main { + call sleep +} +``` +- I submit the WDL to the swagger endpoint (http://localhost:8000/swagger/index.html?url=/swagger/cromwell.yaml) and watch the server logs... +- And as expected: +``` +2016-10-13 13:14:29,017 cromwell-system-akka.dispatchers.engine-dispatcher-39 INFO - MaterializeWorkflowDescriptorActor [UUID(ddd827ba)]: Call-to-Backend assignments: main.sleep -> AWS +2016-10-13 13:14:30,167 cromwell-system-akka.dispatchers.engine-dispatcher-39 INFO - WorkflowExecutionActor-ddd827ba-091f-4c6f-b98f-cc9825717007 [UUID(ddd827ba)]: Starting calls: main.sleep:NA:1 +2016-10-13 13:14:30,983 cromwell-system-akka.actor.default-dispatcher-5 ERROR - guardian failed, shutting down system +scala.NotImplementedError: an implementation is missing + at scala.Predef$.$qmark$qmark$qmark(Predef.scala:230) + at cromwell.backend.impl.aws.AwsJobExecutionActor.execute(AwsJobExecutionActor.scala:12) +``` +- OK, so now I just need to implement `execute(): Future[JobExecutionResult]` and Cromwell can interface with AWS. How hard can it be! + +## Part 2 (October 13 2016): Using Amazon to sleep 20 seconds + +### Starting point +- This was a learning experience after using the Google pipelines service to submit jobs! +- To get myself started, I've manually created an ECS cluster which I've called `ecs-t2micro-cluster` via the ECS web console. + +### Trial and Error + +- I see in the aws sdk docs that there's an AmazonECSAsyncClient class. That sounds promising! Luckily I already added the dependency on AWS SDK in Part 1 so I guess I can just write something basic in my AwsJobExecutionActor class and see what happens: + +- I ended up having to add some credentials options to the configuration file. The new `reference.conf` now looks like: +``` + #AWS { + # actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" + # config { + # ## These two settings are required to authenticate with the ECS service: + # accessKeyId = "..." + # secretKey = "..." + # } + #} +``` + +- After a little bit of experimentation with the ECS API, I was able to come up with a backend that works but is very limited... It is entirely synchronous in the `execute` method. That's certainly not a final answer but it works OK for running a single task. And we can now run that single `sleep` command successfully on the Amazon EC2 Container Service! + - The synchronous `execute` method: +``` +class AwsJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, + override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { + + val awsAccessKeyId = configurationDescriptor.backendConfig.as[String]("accessKeyId") + val awsSecretKey = configurationDescriptor.backendConfig.as[String]("secretKey") + + val clusterName = "ecs-t2micro-cluster" + + val credentials = new AWSCredentials { + override def getAWSAccessKeyId: String = awsAccessKeyId + override def getAWSSecretKey: String = awsSecretKey + } + val ecsAsyncClient = new AmazonECSAsyncClient(credentials) + + override def execute: Future[BackendJobExecutionResponse] = { + + val commandOverride = new ContainerOverride().withName("simple-app").withCommand(jobDescriptor.call.instantiateCommandLine(Map.empty, OnlyPureFunctions, identity).get) + + val runRequest: RunTaskRequest = new RunTaskRequest() + .withCluster(clusterName) + .withCount(1) + .withTaskDefinition("ubuntuTask:1") + .withOverrides(new TaskOverride().withContainerOverrides(commandOverride)) + + val submitResultHandler = new AwsSdkAsyncHandler[RunTaskRequest, RunTaskResult]() + val _ = ecsAsyncClient.runTaskAsync(runRequest, submitResultHandler) + + submitResultHandler.future map { + case AwsSdkAsyncResult(_, result) => + log.info("AWS submission completed:\n{}", result.toString) + val taskArn= result.getTasks.asScala.head.getTaskArn + val taskDescription = waitUntilDone(taskArn) + + log.info("AWS task completed!\n{}", taskDescription.toString) + SucceededResponse(jobDescriptor.key, Option(0), Map.empty, None, Seq.empty) + } + } + + private def waitUntilDone(taskArn: String): Task = { + val describeTasksRequest = new DescribeTasksRequest().withCluster(clusterName).withTasks(List(taskArn).asJava) + + val resultHandler = new AwsSdkAsyncHandler[DescribeTasksRequest, DescribeTasksResult]() + val _ = ecsAsyncClient.describeTasksAsync(describeTasksRequest, resultHandler) + + val desribedTasks = Await.result(resultHandler.future, Duration.Inf) + val taskDescription = desribedTasks.result.getTasks.asScala.head + if (taskDescription.getLastStatus == DesiredStatus.STOPPED.toString) { + taskDescription + } else { + Thread.sleep(200) + waitUntilDone(taskArn) + } + } +} +``` + + diff --git a/README.md b/README.md index 1800278e932..ca5c1457610 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [Workflow Submission](#workflow-submission) * [Database](#database) * [SIGINT abort handler](#sigint-abort-handler) +* [Security](#security) * [Backends](#backends) * [Backend Filesystems](#backend-filesystems) * [Shared Local Filesystem](#shared-local-filesystem) @@ -35,6 +36,7 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [Caching configuration](#caching-configuration) * [Docker](#docker) * [CPU, Memory and Disk](#cpu-memory-and-disk) + * [Native Specifications](#native-specifications) * [Spark Backend](#spark-backend) * [Configuring Spark Project](#configuring-spark-project) * [Configuring Spark Master and Deploy Mode](#configuring-spark-master-and-deploy-mode) @@ -63,6 +65,12 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [Logging](#logging) * [Workflow Options](#workflow-options) * [Call Caching](#call-caching) + * [Configuring Call Caching](#configuring-call-caching) + * [Call Caching Workflow Options](#call-caching-workflow-options) + * [Local Filesystem Options](#local-filesystem-options) +* [Imports](#imports) +* [Sub Workflows](#sub-workflows) +* [Meta blocks](#meta-blocks) * [REST API](#rest-api) * [REST API Versions](#rest-api-versions) * [POST /api/workflows/:version](#post-apiworkflowsversion) @@ -124,23 +132,29 @@ See the [migration document](MIGRATION.md) for more details. Run the JAR file with no arguments to get the usage message: ``` + + $ java -jar cromwell.jar java -jar cromwell.jar Actions: -run [ [ - []]] +run [] [] + [] [] Given a WDL file and JSON file containing the value of the workflow inputs, this will run the workflow locally and print out the outputs in JSON format. The workflow options file specifies some runtime configuration for the workflow (see README for details). The workflow metadata - output is an optional file path to output the metadata. + output is an optional file path to output the metadata. The + directory of WDL files is optional. However, it is required + if the primary workflow imports workflows that are outside + of the root directory of the Cromwell project. + Use a single dash ("-") to skip optional files. Ex: - run noinputs.wdl - - metadata.json + run noinputs.wdl - - metadata.json - -server + server Starts a web server on port 8000. See the web server documentation for more details about the API endpoints. @@ -232,6 +246,39 @@ $ cat my_wf.metadata.json } ``` +The fifth, optional parameter to the 'run' subcommand is a zip file which contains WDL source files. This zip file can be passed +and your primary workflow can import any WDL's from that collection and re-use those tasks. + +For example, consider you have a directory of WDL files: +``` +my_WDLs +└──cgrep.wdl +└──ps.wdl +└──wc.wdl +``` + +If you zip that directory to my_WDLs.zip, you have the option to pass it in as the last parameter in your run command +and be able to reference these WDLs as imports in your primary WDL. For example, your primary WDL can look like this: +``` +import "ps.wdl" as ps +import "cgrep.wdl" +import "wc.wdl" as wordCount + +workflow threestep { + +call ps.ps as getStatus +call cgrep.cgrep { input: str = getStatus.x } +call wordCount { input: str = ... } + +} + +``` +The command to run this WDL, without needing any inputs, workflow options or metadata files would look like: + +``` +$ java -jar cromwell.jar run threestep.wdl - - - /path/to/my_WDLs.zip +``` + ## server Start a server on port 8000, the API for the server is described in the [REST API](#rest-api) section. @@ -292,17 +339,14 @@ Then, edit the configuration file `database` stanza, as follows: ``` database { - config = main.mysql - - main { - mysql { - db.url = "jdbc:mysql://localhost:3306/cromwell" - db.user = "root" - db.password = "" - db.driver = "com.mysql.jdbc.Driver" - db.connectionTimeout = 5000 # NOTE: The default 1000ms is often too short for production mysql use - driver = "slick.driver.MySQLDriver$" - } + + driver = "slick.driver.MySQLDriver$" + db { + driver = "com.mysql.jdbc.Driver" + url = "jdbc:mysql://host/cromwell" + user = "user" + password = "pass" + connectionTimeout = 5000 } test { @@ -323,6 +367,14 @@ system { Or, via `-Dsystem.abort-jobs-on-terminate=true` command line option. +By default, this value is false when running `java -jar cromwell.jar server`, and true when running `java -jar cromwell.jar run `. + +# Security + + - Cromwell is NOT on its own a security appliance! + - Only YOU are responsible for your own security! + - Some recommendations and suggestions on security can be found in the [SecurityRecommendations.md](SecurityRecommendations.md) document + # Backends A backend represents a way to run the user's command specified in the `task` section. Cromwell allows for backends conforming to @@ -889,6 +941,18 @@ This backend supports CPU, memory and disk size configuration through the use of It they are not set, HtCondor backend will use default values. +### Native Specifications +The use of runtime attribute 'nativeSpecs' allows to the user to attach custom HtCondor configuration to tasks. +An example of this is when there is a need to work with 'requirements' or 'rank' configuration. + +``` +"runtimeAttributes": { + "nativeSpecs": ["requirements = Arch == \"INTEL\"", "rank = Memory >= 64"] +} +``` + +nativeSpecs attribute needs to be specified as an array of strings to work. + ## Spark Backend This backend adds support for execution of spark jobs in a workflow using the existing wdl format. @@ -1053,6 +1117,7 @@ backend { config { project = "my-project" root = "gs://my-bucket" + genomics-api-queries-per-100-seconds = 1000 . . . @@ -1062,6 +1127,8 @@ backend { } ``` +If your project has API quotas other than the defaults set the `genomics-api-queries-per-100-seconds` value to be the lesser of the `Queries per 100 seconds per user` and `Queries per 100 seconds` quotas. This value will be used to help tune Cromwell's rate of interaction with JES. + ### Configuring Authentication The `google` stanza in the Cromwell configuration file defines how to authenticate to Google. There are four different @@ -1139,6 +1206,8 @@ Creating the account will cause the JSON file to be downloaded. The structure o Most importantly, the value of the `client_email` field should go into the `service-account-id` field in the configuration (see below). The `private_key` portion needs to be pulled into its own file (e.g. `my-key.pem`). The `\n`s in the string need to be converted to newline characters. +While technically not part of Service Account authorization mode, one can also override the default service account that the compute VM is started with via the configuration option `JES.config.genomics.compute-service-account` or through the workflow options parameter `google_compute_service_account`. It's important that this service account, and the service account specified in `JES.config.genomics.auth` can both read/write the location specified by `JES.config.root` + #### Refresh Token A **refresh_token** field must be specified in the [workflow options](#workflow-options) when submitting the job. Omitting this field will cause the workflow to fail. @@ -1509,7 +1578,8 @@ Valid keys and their meanings: * The default is `NoNewCalls` but this can be changed using the `workflow-options.workflow-failure-mode` configuration option. * **backend** - Override the default backend specified in the Cromwell configuration for this workflow only. * JES Backend Only - * **jes_gcs_root** - (JES backend only) Specifies where outputs of the workflow will be written. Expects this to be a GCS URL (e.g. `gs://my-bucket/workflows`). If this is not set, this defaults to the value within `backend.jes.root` in the [configuration](#configuring-cromwell). + * **jes_gcs_root** - (JES backend only) Specifies where outputs of the workflow will be written. Expects this to be a GCS URL (e.g. `gs://my-bucket/workflows`). If this is not set, this defaults to the value within `backend.jes.config.root` in the [configuration](#configuring-cromwell). + * **google_compute_service_account** - (JES backend only) Specifies an alternate service account to use on the compute instance (e.g. my-new-svcacct@my-google-project.iam.gserviceaccount.com). If this is not set, this defaults to the value within `backend.jes.config.genomics.compute-service-account` in the [configuration](#configuring-cromwell) if specified or `default` otherwise. * **google_project** - (JES backend only) Specifies which google project to execute this workflow. * **refresh_token** - (JES backend only) Only used if `localizeWithRefreshToken` is specified in the [configuration file](#configuring-cromwell). * **auth_bucket** - (JES backend only) defaults to the the value in **jes_gcs_root**. This should represent a GCS URL that only Cromwell can write to. The Cromwell account is determined by the `google.authScheme` (and the corresponding `google.userAuth` and `google.serviceAuth`) @@ -1528,16 +1598,20 @@ Cromwell's call cache is maintained in its database. For best mileage with call > **Note:** If call caching is enabled, be careful not to change the contents of the output directory for any previously run job. Doing so might cause cache hits in Cromwell to copy over modified data and Cromwell currently does not check that the contents of the output directory changed. +## Configuring Call Caching To enable Call Caching, add the following to your Cromwell [configuration](#configuring-cromwell): ``` call-caching { enabled = true + invalidate-bad-cache-results = true } ``` When `call-caching.enabled=true` (default: `false`), Cromwell will be able to to copy results from previously run jobs (when appropriate). +When `invalidate-bad-cache-results=true` (default: `true`), Cromwell will invalidate any cache results which fail to copy during a cache-hit. This is usually desired but might be unwanted if a cache might fail to copy for external reasons, such as a difference in user authentication. +## Call Caching Workflow Options Cromwell also accepts two [workflow option](#workflow-options) related to call caching: * If call caching is enabled, but one wishes to run a workflow but not add any of the calls into the call cache when they finish, the `write_to_cache` option can be set to `false`. This value defaults to `true`. @@ -1545,6 +1619,508 @@ Cromwell also accepts two [workflow option](#workflow-options) related to call c > **Note:** If call caching is disabled, the to workflow options `read_from_cache` and `write_to_cache` will be ignored and the options will be treated as though they were 'false'. +## Local Filesystem Options +When running a job on the Config (Shared Filesystem) backend, Cromwell provides some additional options in the backend's config section: + +``` + config { + ... + filesystems { + ... + local { + ... + caching { + # When copying a cached result, what type of file duplication should occur. Attempted in the order listed below: + duplication-strategy: [ + "hard-link", "soft-link", "copy" + ] + + # Possible values: file, path + # "file" will compute an md5 hash of the file content. + # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", + # in order to allow for the original file path to be hashed. + # Default: file + hashing-strategy: "file" + + # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. + # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. + # Default: false + check-sibling-md5: false + } + } + } + } +``` +# Imports + +Import statements inside of a WDL file are supported by Cromwell when running in Server mode as well as Single Workflow Runner Mode. + +In Single Workflow Runner Mode, you pass in a zip file which includes the WDL files referenced by the import statements. Cromwell requires the zip file to be passed in as a command line argument, as explained by the section [run](#run). + +For example, given a workflow `wf.wdl` and an imports directory `WdlImports.zip`, a sample command would be: +``` +java -jar cromwell.jar wf.wdl wf.inputs - - WdlImports.zip +``` + +In Server Mode, you pass in a zip file using the parameter `wdlDependencies` via the [POST /api/workflows/:version](#post-apiworkflowsversion) endpoint. + + +# Sub Workflows + +WDL allows the execution of an entire workflow as a step in a larger workflow (see WDL SPEC for more details), which is what will be referred to as a sub workflow going forward. +Cromwell supports execution of such workflows. Note that sub workflows can themselves contain sub workflows, etc... There is no limitation as to how deeply workflows can be nested. + +## Execution + +Sub workflows are executed exactly as a task would be. +*This means that if another call depends on an output of a sub workflow, this call will run when the whole sub workflow completes (successfully).* +For example, in the following case : + +`main.wdl` +``` +import "sub_wdl.wdl" as sub + +workflow main_workflow { + + call sub.hello_and_goodbye { input: hello_and_goodbye_input = "sub world" } + + # call myTask { input: hello_and_goodbye.hello_output } + + output { + String main_output = hello_and_goodbye.hello_output + } +} +``` + +`sub_wdl.wdl` +``` +task hello { + String addressee + command { + echo "Hello ${addressee}!" + } + runtime { + docker: "ubuntu:latest" + } + output { + String salutation = read_string(stdout()) + } +} + +task goodbye { + String addressee + command { + echo "Goodbye ${addressee}!" + } + runtime { + docker: "ubuntu:latest" + } + output { + String salutation = read_string(stdout()) + } +} + +workflow hello_and_goodbye { + String hello_and_goodbye_input + + call hello {input: addressee = hello_and_goodbye_input } + call goodbye {input: addressee = hello_and_goodbye_input } + + output { + String hello_output = hello.salutation + String goodbye_output = goodbye.salutation + } +} +``` + +`myTask` will start only when hello_and_goodbye completes (which means all of its calls are done), even though `myTask` only needs the output of hello in the hello_and_goodbye sub workflow. +If hello_and_goodbye fails, then `myTask` won't be executed. +Only workflow outputs are visible outside a workflow, which means that references to outputs produced by a sub workflow will only be valid if those outputs are exposed in the workflow output section. + +Sub workflows are executed in the context of a main workflow, which means that operations that are normally executed once per workflow (set up, clean up, outputs copying, log copying, etc...) +will NOT be re-executed for each sub workflow. For instance if a resource is created during workflow initialization, sub workflows will need to share this same resource. +Workflow outputs will be copied for the main root workflow but not for intermediate sub workflows. + +Restarts, aborts, and call-caching work exactly as they would with tasks. +All tasks run by a sub workflow are eligible for call caching under the same rules as any other task. +However, workflows themselves are not cached as such. Which means that running the exact same workflow twice with call caching on will trigger each task to cache individually, +but not the workflow itself. + +The root path for sub workflow execution files (scripts, output files, logs) will be under the parent workflow call directory. +For example, the execution directory for the above main workflow would look like the following: + +``` +cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/ <- main workflow id +└── call-hello_and_goodbye <- call directory for call hello_and_goodbye in the main workflow + └── hello_and_goodbye <- name of the sub workflow + └── a6365f91-c807-465a-9186-a5d3da98fe11 <- sub workflow id + ├── call-goodbye + │   └── execution + │   ├── rc + │   ├── script + │   ├── script.background + │   ├── script.submit + │   ├── stderr + │   ├── stderr.background + │   ├── stdout + │   └── stdout.background + └── call-hello + └── execution + ├── rc + ├── script + ├── script.background + ├── script.submit + ├── stderr + ├── stderr.background + ├── stdout + └── stdout.background + +``` + +## Metadata +Each sub workflow will have its own workflow ID. This ID will appear in the metadata of the parent workflow, in the call section corresponding to the sub workflow, under the "subWorkflowId" attribute. +For example, querying the `main_workflow` metadata above (minus the `myTask` call) , could result in something like this: + +`GET /api/workflows/v2/1d919bd4-d046-43b0-9918-9964509689dd/metadata` + +``` +{ + "workflowName": "main_workflow", + "submittedFiles": { + "inputs": "{}", + "workflow": "import \"sub_wdl.wdl\" as sub\n\nworkflow main_workflow {\n\n call sub.hello_and_goodbye { input: hello_and_goodbye_input = \"sub world\" }\n \n # call myTask { input: hello_and_goodbye.hello_output }\n \n output {\n String main_output = hello_and_goodbye.hello_output\n }\n}", + "options": "{\n\n}" + }, + "calls": { + "main_workflow.hello_and_goodbye": [ + { + "executionStatus": "Done", + "shardIndex": -1, + "outputs": { + "goodbye_output": "Goodbye sub world!", + "hello_output": "Hello sub world!" + }, + "inputs": { + "hello_and_goodbye_input": "sub world" + }, + "end": "2016-11-17T14:13:41.117-05:00", + "attempt": 1, + "start": "2016-11-17T14:13:39.236-05:00", + "subWorkflowId": "a6365f91-c807-465a-9186-a5d3da98fe11" + } + ] + }, + "outputs": { + "main_output": "Hello sub world!" + }, + "workflowRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd", + "id": "1d919bd4-d046-43b0-9918-9964509689dd", + "inputs": {}, + "submission": "2016-11-17T14:13:39.104-05:00", + "status": "Succeeded", + "end": "2016-11-17T14:13:41.120-05:00", + "start": "2016-11-17T14:13:39.204-05:00" +} +``` + +The sub workflow ID can be queried separately: + +`GET /api/workflows/v2/a6365f91-c807-465a-9186-a5d3da98fe11/metadata` + +``` +{ + "workflowName": "hello_and_goodbye", + "calls": { + "sub.hello_and_goodbye.hello": [ + { + "executionStatus": "Done", + "stdout": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello/execution/stdout", + "shardIndex": -1, + "outputs": { + "salutation": "Hello sub world!" + }, + "runtimeAttributes": { + "docker": "ubuntu:latest", + "failOnStderr": false, + "continueOnReturnCode": "0" + }, + "cache": { + "allowResultReuse": true + }, + "Effective call caching mode": "CallCachingOff", + "inputs": { + "addressee": "sub world" + }, + "returnCode": 0, + "jobId": "49830", + "backend": "Local", + "end": "2016-11-17T14:13:40.712-05:00", + "stderr": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello/execution/stderr", + "callRoot": "/cromwell/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "Pending", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "RequestingExecutionToken", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "PreparingJob", + "endTime": "2016-11-17T14:13:39.243-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.243-05:00", + "description": "RunningJob", + "endTime": "2016-11-17T14:13:40.704-05:00" + }, + { + "startTime": "2016-11-17T14:13:40.704-05:00", + "description": "UpdatingJobStore", + "endTime": "2016-11-17T14:13:40.712-05:00" + } + ], + "start": "2016-11-17T14:13:39.239-05:00" + } + ], + "sub.hello_and_goodbye.goodbye": [ + { + "executionStatus": "Done", + "stdout": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye/execution/stdout", + "shardIndex": -1, + "outputs": { + "salutation": "Goodbye sub world!" + }, + "runtimeAttributes": { + "docker": "ubuntu:latest", + "failOnStderr": false, + "continueOnReturnCode": "0" + }, + "cache": { + "allowResultReuse": true + }, + "Effective call caching mode": "CallCachingOff", + "inputs": { + "addressee": "sub world" + }, + "returnCode": 0, + "jobId": "49831", + "backend": "Local", + "end": "2016-11-17T14:13:41.115-05:00", + "stderr": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye/execution/stderr", + "callRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "Pending", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "RequestingExecutionToken", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "PreparingJob", + "endTime": "2016-11-17T14:13:39.243-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.243-05:00", + "description": "RunningJob", + "endTime": "2016-11-17T14:13:41.112-05:00" + }, + { + "startTime": "2016-11-17T14:13:41.112-05:00", + "description": "UpdatingJobStore", + "endTime": "2016-11-17T14:13:41.115-05:00" + } + ], + "start": "2016-11-17T14:13:39.239-05:00" + } + ] + }, + "outputs": { + "goodbye_output": "Goodbye sub world!", + "hello_output": "Hello sub world!" + }, + "workflowRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11", + "id": "a6365f91-c807-465a-9186-a5d3da98fe11", + "inputs": { + "hello_and_goodbye_input": "sub world" + }, + "status": "Succeeded", + "parentWorkflowId": "1d919bd4-d046-43b0-9918-9964509689dd", + "end": "2016-11-17T14:13:41.116-05:00", + "start": "2016-11-17T14:13:39.236-05:00" +} +``` + +It's also possible to set the URL query parameter `expandSubWorkflows` to `true` to automatically include sub workflows metadata (`false` by default). + +`GET api/workflows/v2/1d919bd4-d046-43b0-9918-9964509689dd/metadata?expandSubWorkflows=true` + +``` +{ + "workflowName": "main_workflow", + "submittedFiles": { + "inputs": "{}", + "workflow": "import \"sub_wdl.wdl\" as sub\n\nworkflow main_workflow {\n\n call sub.hello_and_goodbye { input: hello_and_goodbye_input = \"sub world\" }\n \n # call myTask { input: hello_and_goodbye.hello_output }\n \n output {\n String main_output = hello_and_goodbye.hello_output\n }\n}", + "options": "{\n\n}" + }, + "calls": { + "main_workflow.hello_and_goodbye": [{ + "executionStatus": "Done", + "subWorkflowMetadata": { + "workflowName": "hello_and_goodbye", + "calls": { + "sub.hello_and_goodbye.hello": [{ + "executionStatus": "Done", + "stdout": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello/execution/stdout", + "shardIndex": -1, + "outputs": { + "salutation": "Hello sub world!" + }, + "runtimeAttributes": { + "docker": "ubuntu:latest", + "failOnStderr": false, + "continueOnReturnCode": "0" + }, + "cache": { + "allowResultReuse": true + }, + "Effective call caching mode": "CallCachingOff", + "inputs": { + "addressee": "sub world" + }, + "returnCode": 0, + "jobId": "49830", + "backend": "Local", + "end": "2016-11-17T14:13:40.712-05:00", + "stderr": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello/execution/stderr", + "callRoot": "cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello", + "attempt": 1, + "executionEvents": [{ + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "Pending", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "RequestingExecutionToken", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "PreparingJob", + "endTime": "2016-11-17T14:13:39.243-05:00" + }, { + "startTime": "2016-11-17T14:13:39.243-05:00", + "description": "RunningJob", + "endTime": "2016-11-17T14:13:40.704-05:00" + }, { + "startTime": "2016-11-17T14:13:40.704-05:00", + "description": "UpdatingJobStore", + "endTime": "2016-11-17T14:13:40.712-05:00" + }], + "start": "2016-11-17T14:13:39.239-05:00" + }], + "sub.hello_and_goodbye.goodbye": [{ + "executionStatus": "Done", + "stdout": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye/execution/stdout", + "shardIndex": -1, + "outputs": { + "salutation": "Goodbye sub world!" + }, + "runtimeAttributes": { + "docker": "ubuntu:latest", + "failOnStderr": false, + "continueOnReturnCode": "0" + }, + "cache": { + "allowResultReuse": true + }, + "Effective call caching mode": "CallCachingOff", + "inputs": { + "addressee": "sub world" + }, + "returnCode": 0, + "jobId": "49831", + "backend": "Local", + "end": "2016-11-17T14:13:41.115-05:00", + "stderr": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye/execution/stderr", + "callRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye", + "attempt": 1, + "executionEvents": [{ + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "Pending", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "RequestingExecutionToken", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "PreparingJob", + "endTime": "2016-11-17T14:13:39.243-05:00" + }, { + "startTime": "2016-11-17T14:13:39.243-05:00", + "description": "RunningJob", + "endTime": "2016-11-17T14:13:41.112-05:00" + }, { + "startTime": "2016-11-17T14:13:41.112-05:00", + "description": "UpdatingJobStore", + "endTime": "2016-11-17T14:13:41.115-05:00" + }], + "start": "2016-11-17T14:13:39.239-05:00" + }] + }, + "outputs": { + "goodbye_output": "Goodbye sub world!", + "hello_output": "Hello sub world!" + }, + "workflowRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11", + "id": "a6365f91-c807-465a-9186-a5d3da98fe11", + "inputs": { + "hello_and_goodbye_input": "sub world" + }, + "status": "Succeeded", + "parentWorkflowId": "1d919bd4-d046-43b0-9918-9964509689dd", + "end": "2016-11-17T14:13:41.116-05:00", + "start": "2016-11-17T14:13:39.236-05:00" + }, + "shardIndex": -1, + "outputs": { + "goodbye_output": "Goodbye sub world!", + "hello_output": "Hello sub world!" + }, + "inputs": { + "hello_and_goodbye_input": "sub world" + }, + "end": "2016-11-17T14:13:41.117-05:00", + "attempt": 1, + "start": "2016-11-17T14:13:39.236-05:00" + }] + }, + "outputs": { + "main_output": "Hello sub world!" + }, + "workflowRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd", + "id": "1d919bd4-d046-43b0-9918-9964509689dd", + "inputs": { + + }, + "submission": "2016-11-17T14:13:39.104-05:00", + "status": "Succeeded", + "end": "2016-11-17T14:13:41.120-05:00", + "start": "2016-11-17T14:13:39.204-05:00" +} +``` + # REST API The `server` subcommand on the executable JAR will start an HTTP server which can accept WDL files to run as well as check status and output of existing workflows. @@ -1561,7 +2137,18 @@ This endpoint accepts a POST request with a `multipart/form-data` encoded body. * `wdlSource` - *Required* Contains the WDL file to submit for execution. * `workflowInputs` - *Optional* JSON file containing the inputs. A skeleton file can be generated from [wdltool](https://github.com/broadinstitute/wdltool) using the "inputs" subcommand. +* `workflowInputs_2` - *Optional* JSON file containing the inputs. +* `workflowInputs_3` - *Optional* JSON file containing the inputs. +* `workflowInputs_4` - *Optional* JSON file containing the inputs. +* `workflowInputs_5` - *Optional* JSON file containing the inputs. * `workflowOptions` - *Optional* JSON file containing options for this workflow execution. See the [run](#run) CLI sub-command for some more information about this. +* `wdlDependencies` - *Optional* ZIP file containing WDL files that are used to resolve import statements. + +Regarding the workflowInputs parameter, in case of key conflicts between multiple input JSON files, higher values of x in workflowInputs_x override lower values. For example, an input specified in workflowInputs_3 will override an input with the same name in workflowInputs or workflowInputs_2. +Similarly, an input key specified in workflowInputs_5 will override an identical input key in any other input file. + +Additionally, although Swagger has a limit of 5 JSON input files, the REST endpoint itself can accept an unlimited number of JSON input files. + cURL: diff --git a/SecurityRecommendations.md b/SecurityRecommendations.md new file mode 100644 index 00000000000..3d980e582af --- /dev/null +++ b/SecurityRecommendations.md @@ -0,0 +1,51 @@ +Security +======== + + +* [Firecloud](#firecloud) +* [Security by sysadmin](#security) + * [Multi-tenant](#multi-tenant) + + +# Firecloud + +TODO + +# Security by sysadmin +__Warning!__ + +__This section is community-contributed. It is intended as helpful guidance only, and is not endorsed by the Broad Institute.__ + +Cromwell running in server mode accepts all connections on the configured webservice port. The simplest way to restrict access is by putting an authenticating proxy server in between users and the cromwell server: + 1. Configure a firewall rule on the cromwell server host to deny access to the webservice port (e.g. 8000) from all addresses except a secure proxy host. + 1. Configure `` on the proxy host with ``, to proxy authenticated traffic from the world to the cromwell server. Using Apache `httpd` web server for example with basic htpassword file-based authentication, the configuration might look something like: + + ```Apache + + Order deny,allow + Allow from all + AuthType Basic + AuthName "Password Required" + AuthUserFile /path/to/my/htpasswdfile + Require user someone someoneelse + ProxyPass http://101.101.234.567:8000 # address of cromwell server web service + +``` + + 1. That's it. Users now hit `http://my.proxy.org/cromwell` with authenticated requests, and they're forwarded to port 8000 on the cromwell server host. + +## Multi-tenant +The above scheme extends easily to multiple cromwell instances, for use by different groups within an organization for example. If the instances are running on the same host then each instance should be run as its own dedicated service account user, e.g. `cromwell1`, `cromwell2` etc. so that processes running under one cromwell instance cannot access the files of another; different webservice ports must also be configured. If persistent database storage is being used then each instance should be configured with its own database and database user. The proxy configuration above is extended simply by adding another `Location`: + +```Apache + + Order deny,allow + Allow from all + AuthType Basic + AuthName "Password Required" + AuthUserFile /path/to/my/htpasswdfile1 + Require user stillanotherperson andanother + ProxyPass http://101.101.234.567:8001 + +``` + diff --git a/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala b/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala index 11a95df398c..f1bf383866b 100644 --- a/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala @@ -3,7 +3,7 @@ package cromwell.backend import akka.actor.{Actor, ActorLogging} import akka.event.LoggingReceive import cromwell.backend.BackendCacheHitCopyingActor.CopyOutputsCommand -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse, FailedNonRetryableResponse} +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse, JobFailedNonRetryableResponse} import cromwell.backend.BackendLifecycleActor._ import cromwell.core.simpleton.WdlValueSimpleton @@ -29,6 +29,6 @@ trait BackendCacheHitCopyingActor extends Actor with ActorLogging with BackendJo def abort(): Unit = log.warning("{}: Abort not supported during cache hit copying", jobTag) private def cachingFailed(t: Throwable) = { - FailedNonRetryableResponse(jobKey = jobDescriptor.key, throwable = t, returnCode = None) + JobFailedNonRetryableResponse(jobKey = jobDescriptor.key, throwable = t, returnCode = None) } } diff --git a/backend/src/main/scala/cromwell/backend/BackendJobBreadCrumb.scala b/backend/src/main/scala/cromwell/backend/BackendJobBreadCrumb.scala new file mode 100644 index 00000000000..1dbc9ca5090 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/BackendJobBreadCrumb.scala @@ -0,0 +1,14 @@ +package cromwell.backend + +import java.nio.file.Path + +import cromwell.backend.io.JobPaths +import cromwell.core.{JobKey, WorkflowId} +import wdl4s.Workflow + +case class BackendJobBreadCrumb(workflow: Workflow, id: WorkflowId, jobKey: JobKey) { + def toPath(root: Path): Path = { + val workflowPart = root.resolve(workflow.unqualifiedName).resolve(id.toString) + JobPaths.callPathBuilder(workflowPart, jobKey) + } +} diff --git a/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala b/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala index 60cdc1e02fb..816f43da997 100644 --- a/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala @@ -1,10 +1,13 @@ package cromwell.backend +import java.nio.file.Path + import akka.actor.ActorLogging import akka.event.LoggingReceive import cromwell.backend.BackendJobExecutionActor._ import cromwell.backend.BackendLifecycleActor._ -import cromwell.core.{ExecutionEvent, JobOutputs} +import cromwell.backend.wdl.OutputEvaluator +import cromwell.core.{CallOutputs, ExecutionEvent, JobKey} import wdl4s.expression.WdlStandardLibraryFunctions import wdl4s.values.WdlValue @@ -21,12 +24,12 @@ object BackendJobExecutionActor { // Responses sealed trait BackendJobExecutionActorResponse extends BackendWorkflowLifecycleActorResponse - sealed trait BackendJobExecutionResponse extends BackendJobExecutionActorResponse { def jobKey: BackendJobDescriptorKey } - case class SucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: JobOutputs, jobDetritusFiles: Option[Map[String, String]], executionEvents: Seq[ExecutionEvent]) extends BackendJobExecutionResponse + sealed trait BackendJobExecutionResponse extends BackendJobExecutionActorResponse { def jobKey: JobKey } + case class JobSucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: CallOutputs, jobDetritusFiles: Option[Map[String, Path]], executionEvents: Seq[ExecutionEvent]) extends BackendJobExecutionResponse case class AbortedResponse(jobKey: BackendJobDescriptorKey) extends BackendJobExecutionResponse sealed trait BackendJobFailedResponse extends BackendJobExecutionResponse { def throwable: Throwable; def returnCode: Option[Int] } - case class FailedNonRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) extends BackendJobFailedResponse - case class FailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) extends BackendJobFailedResponse + case class JobFailedNonRetryableResponse(jobKey: JobKey, throwable: Throwable, returnCode: Option[Int]) extends BackendJobFailedResponse + case class JobFailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) extends BackendJobFailedResponse } /** @@ -45,7 +48,7 @@ trait BackendJobExecutionActor extends BackendJobLifecycleActor with ActorLoggin // We need this for receive because we can't do `onFailure = ExecutionFailure` directly - because BackendJobDescriptor =/= BackendJobDescriptorKey private def executionFailed = (t: Throwable) => - FailedNonRetryableResponse(jobKey = jobDescriptor.key, throwable = t, returnCode = None) + JobFailedNonRetryableResponse(jobKey = jobDescriptor.key, throwable = t, returnCode = None) /** * Execute a new job. diff --git a/backend/src/main/scala/cromwell/backend/BackendLifecycleActor.scala b/backend/src/main/scala/cromwell/backend/BackendLifecycleActor.scala index 58ef726435f..a6a09cff4a3 100644 --- a/backend/src/main/scala/cromwell/backend/BackendLifecycleActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendLifecycleActor.scala @@ -3,7 +3,7 @@ package cromwell.backend import akka.actor.{Actor, ActorRef} import cromwell.backend.BackendLifecycleActor._ import cromwell.core.logging.{JobLogging, WorkflowLogging} -import wdl4s.Call +import wdl4s.TaskCall import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} @@ -55,7 +55,7 @@ trait BackendLifecycleActor extends Actor { trait BackendWorkflowLifecycleActor extends BackendLifecycleActor with WorkflowLogging { //For Logging and boilerplate - override lazy final val workflowId = workflowDescriptor.id + override lazy final val workflowIdForLogging = workflowDescriptor.id /** * The workflow descriptor for the workflow in which this Backend is being used @@ -65,7 +65,7 @@ trait BackendWorkflowLifecycleActor extends BackendLifecycleActor with WorkflowL /** * The subset of calls which this backend will be expected to run */ - protected def calls: Seq[Call] + protected def calls: Set[TaskCall] } trait BackendJobLifecycleActor extends BackendLifecycleActor with JobLogging { diff --git a/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala b/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala index 5a6c5d26823..cb625a78bce 100644 --- a/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala +++ b/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala @@ -6,17 +6,17 @@ import akka.actor.{ActorRef, Props} import com.typesafe.config.Config import cromwell.backend.callcaching.FileHashingActor import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction -import cromwell.backend.io.WorkflowPaths +import cromwell.backend.io.WorkflowPathsWithDocker +import cromwell.core.CallOutputs import cromwell.core.JobExecutionToken.JobExecutionTokenType -import cromwell.core.{ExecutionStore, OutputStore} -import wdl4s.Call -import wdl4s.expression.WdlStandardLibraryFunctions +import wdl4s.TaskCall +import wdl4s.expression.{PureStandardLibraryFunctions, WdlStandardLibraryFunctions} trait BackendLifecycleActorFactory { def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - serviceRegistryActor: ActorRef): Option[Props] + calls: Set[TaskCall], + serviceRegistryActor: ActorRef): Option[Props] = None def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], @@ -37,17 +37,21 @@ trait BackendLifecycleActorFactory { def backendSingletonActorProps: Option[Props] = None def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - executionStore: ExecutionStore, - outputStore: OutputStore, + calls: Set[TaskCall], + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: Option[BackendInitializationData]): Option[Props] = None def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, - initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions + initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = PureStandardLibraryFunctions def getExecutionRootPath(workflowDescriptor: BackendWorkflowDescriptor, backendConfig: Config, initializationData: Option[BackendInitializationData]): Path = { - new WorkflowPaths(workflowDescriptor, backendConfig).executionRoot + new WorkflowPathsWithDocker(workflowDescriptor, backendConfig).executionRoot + } + + def getWorkflowExecutionRootPath(workflowDescriptor: BackendWorkflowDescriptor, backendConfig: Config, initializationData: Option[BackendInitializationData]): Path = { + new WorkflowPathsWithDocker(workflowDescriptor, backendConfig).workflowRoot } def runtimeAttributeDefinitions(initializationDataOption: Option[BackendInitializationData]): Set[RuntimeAttributeDefinition] = Set.empty diff --git a/backend/src/main/scala/cromwell/backend/BackendWorkflowInitializationActor.scala b/backend/src/main/scala/cromwell/backend/BackendWorkflowInitializationActor.scala index f98234ce5a2..feaf5720bf0 100644 --- a/backend/src/main/scala/cromwell/backend/BackendWorkflowInitializationActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendWorkflowInitializationActor.scala @@ -4,13 +4,13 @@ import akka.actor.{ActorLogging, ActorRef} import akka.event.LoggingReceive import cromwell.backend.BackendLifecycleActor._ import cromwell.backend.BackendWorkflowInitializationActor._ -import cromwell.backend.wdl.OnlyPureFunctions +import wdl4s.expression.PureStandardLibraryFunctions import cromwell.core.{WorkflowMetadataKeys, WorkflowOptions} import cromwell.services.metadata.MetadataService.PutMetadataAction import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} import wdl4s.types._ import wdl4s.values.{WdlArray, WdlBoolean, WdlInteger, WdlString, WdlValue} -import wdl4s.{Call, NoLookup, Task, WdlExpression} +import wdl4s._ import scala.concurrent.Future import scala.util.{Failure, Success, Try} @@ -36,7 +36,7 @@ object BackendWorkflowInitializationActor { trait BackendWorkflowInitializationActor extends BackendWorkflowLifecycleActor with ActorLogging { val serviceRegistryActor: ActorRef - def calls: Seq[Call] + def calls: Set[TaskCall] /** * This method is meant only as a "pre-flight check" validation of runtime attribute expressions during workflow @@ -53,7 +53,7 @@ trait BackendWorkflowInitializationActor extends BackendWorkflowLifecycleActor w wdlExpressionMaybe match { case None => !valueRequired case Some(wdlExpression: WdlExpression) => - wdlExpression.evaluate(NoLookup, OnlyPureFunctions) map (_.wdlType) match { + wdlExpression.evaluate(NoLookup, PureStandardLibraryFunctions) map (_.wdlType) match { case Success(wdlType) => predicate(wdlType) case Failure(_) => true // If we can't evaluate it, we'll let it pass for now... } @@ -81,7 +81,7 @@ trait BackendWorkflowInitializationActor extends BackendWorkflowLifecycleActor w wdlExpressionMaybe match { case None => !valueRequired case Some(wdlExpression: WdlExpression) => - wdlExpression.evaluate(NoLookup, OnlyPureFunctions) match { + wdlExpression.evaluate(NoLookup, PureStandardLibraryFunctions) match { case Success(wdlValue) => validateValue(wdlValue) case Failure(throwable) => true // If we can't evaluate it, we'll let it pass for now... } @@ -91,6 +91,9 @@ trait BackendWorkflowInitializationActor extends BackendWorkflowLifecycleActor w protected def runtimeAttributeValidators: Map[String, Option[WdlValue] => Boolean] + // FIXME: If a workflow executes jobs using multiple backends, + // each backend will try to write its own workflow root and override any previous one. + // They should be structured differently or at least be prefixed by the backend name protected def publishWorkflowRoot(workflowRoot: String) = { serviceRegistryActor ! PutMetadataAction(MetadataEvent(MetadataKey(workflowDescriptor.id, None, WorkflowMetadataKeys.WorkflowRoot), MetadataValue(workflowRoot))) } diff --git a/backend/src/main/scala/cromwell/backend/OutputEvaluator.scala b/backend/src/main/scala/cromwell/backend/OutputEvaluator.scala deleted file mode 100644 index 0c63652651b..00000000000 --- a/backend/src/main/scala/cromwell/backend/OutputEvaluator.scala +++ /dev/null @@ -1,31 +0,0 @@ -package cromwell.backend - -import cromwell.core.JobOutput -import wdl4s._ -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.util.TryUtil -import wdl4s.values.WdlValue - -import scala.util.{Success, Try} - -object OutputEvaluator { - def evaluateOutputs(jobDescriptor: BackendJobDescriptor, - wdlFunctions: WdlStandardLibraryFunctions, - postMapper: WdlValue => Try[WdlValue] = v => Success(v)) = { - val inputs = jobDescriptor.inputs - val evaluatedOutputs = jobDescriptor.call.task.outputs. - foldLeft(Map.empty[LocallyQualifiedName, Try[JobOutput]])((outputMap, output) => { - val currentOutputs = outputMap collect { - case (name, value) if value.isSuccess => name -> value.get.wdlValue - } - def lookup = (currentOutputs ++ inputs).apply _ - val coerced = output.requiredExpression.evaluate(lookup, wdlFunctions) flatMap output.wdlType.coerceRawValue - val jobOutput = output.name -> (coerced flatMap postMapper map JobOutput) - - outputMap + jobOutput - - }) - - TryUtil.sequenceMap(evaluatedOutputs, s"Workflow ${jobDescriptor.workflowDescriptor.id} post processing failed.") - } -} diff --git a/backend/src/main/scala/cromwell/backend/RuntimeAttributeDefinition.scala b/backend/src/main/scala/cromwell/backend/RuntimeAttributeDefinition.scala index 2383091432c..ae9d4b9bf26 100644 --- a/backend/src/main/scala/cromwell/backend/RuntimeAttributeDefinition.scala +++ b/backend/src/main/scala/cromwell/backend/RuntimeAttributeDefinition.scala @@ -20,8 +20,8 @@ object RuntimeAttributeDefinition { def evaluateRuntimeAttributes(unevaluated: RuntimeAttributes, wdlFunctions: WdlStandardLibraryFunctions, - evaluatedInputs: Map[LocallyQualifiedName, WdlValue]): Try[Map[String, WdlValue]] = { - val tryInputs = evaluatedInputs map { case (x, y) => x -> Success(y) } + evaluatedInputs: Map[Declaration, WdlValue]): Try[Map[String, WdlValue]] = { + val tryInputs = evaluatedInputs map { case (x, y) => x.unqualifiedName -> Success(y) } val mapBasedLookup = buildMapBasedLookup(tryInputs) _ val mapOfTries = unevaluated.attrs mapValues { expr => expr.evaluate(mapBasedLookup, wdlFunctions) diff --git a/backend/src/main/scala/cromwell/backend/async/AsyncBackendJobExecutionActor.scala b/backend/src/main/scala/cromwell/backend/async/AsyncBackendJobExecutionActor.scala index bbbfbf82b00..759127d6748 100644 --- a/backend/src/main/scala/cromwell/backend/async/AsyncBackendJobExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/async/AsyncBackendJobExecutionActor.scala @@ -2,7 +2,7 @@ package cromwell.backend.async import akka.actor.{Actor, ActorLogging, ActorRef} import cromwell.backend.BackendJobDescriptor -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, SucceededResponse, _} +import cromwell.backend.BackendJobExecutionActor._ import cromwell.backend.async.AsyncBackendJobExecutionActor._ import cromwell.core.CromwellFatalException import cromwell.core.retry.{Retry, SimpleExponentialBackoff} @@ -60,7 +60,7 @@ trait AsyncBackendJobExecutionActor { this: Actor with ActorLogging => } private def failAndStop(t: Throwable) = { - val responseBuilder = if (retryable) FailedRetryableResponse else FailedNonRetryableResponse + val responseBuilder = if (retryable) JobFailedRetryableResponse else JobFailedNonRetryableResponse completionPromise.success(responseBuilder.apply(jobDescriptor.key, t, None)) context.stop(self) } @@ -75,13 +75,13 @@ trait AsyncBackendJobExecutionActor { this: Actor with ActorLogging => context.system.scheduler.scheduleOnce(pollBackOff.backoffMillis.millis, self, IssuePollRequest(handle)) () case Finish(SuccessfulExecutionHandle(outputs, returnCode, jobDetritusFiles, executionEvents, resultsClonedFrom)) => - completionPromise.success(SucceededResponse(jobDescriptor.key, Some(returnCode), outputs, Option(jobDetritusFiles), executionEvents)) + completionPromise.success(JobSucceededResponse(jobDescriptor.key, Some(returnCode), outputs, Option(jobDetritusFiles), executionEvents)) context.stop(self) case Finish(FailedNonRetryableExecutionHandle(throwable, returnCode)) => - completionPromise.success(FailedNonRetryableResponse(jobDescriptor.key, throwable, returnCode)) + completionPromise.success(JobFailedNonRetryableResponse(jobDescriptor.key, throwable, returnCode)) context.stop(self) case Finish(FailedRetryableExecutionHandle(throwable, returnCode)) => - completionPromise.success(FailedRetryableResponse(jobDescriptor.key, throwable, returnCode)) + completionPromise.success(JobFailedRetryableResponse(jobDescriptor.key, throwable, returnCode)) context.stop(self) case Finish(cromwell.backend.async.AbortedExecutionHandle) => completionPromise.success(AbortedResponse(jobDescriptor.key)) diff --git a/backend/src/main/scala/cromwell/backend/async/ExecutionHandle.scala b/backend/src/main/scala/cromwell/backend/async/ExecutionHandle.scala index 88232f3b29b..1e4238014d9 100644 --- a/backend/src/main/scala/cromwell/backend/async/ExecutionHandle.scala +++ b/backend/src/main/scala/cromwell/backend/async/ExecutionHandle.scala @@ -1,7 +1,9 @@ package cromwell.backend.async +import java.nio.file.Path + import cromwell.backend.BackendJobDescriptor -import cromwell.core.{ExecutionEvent, JobOutputs} +import cromwell.core.{ExecutionEvent, CallOutputs} /** * Trait to encapsulate whether an execution is complete and if so provide a result. Useful in conjunction @@ -12,7 +14,7 @@ trait ExecutionHandle { def result: ExecutionResult } -final case class SuccessfulExecutionHandle(outputs: JobOutputs, returnCode: Int, jobDetritusFiles: Map[String, String], executionEvents: Seq[ExecutionEvent], resultsClonedFrom: Option[BackendJobDescriptor] = None) extends ExecutionHandle { +final case class SuccessfulExecutionHandle(outputs: CallOutputs, returnCode: Int, jobDetritusFiles: Map[String, Path], executionEvents: Seq[ExecutionEvent], resultsClonedFrom: Option[BackendJobDescriptor] = None) extends ExecutionHandle { override val isDone = true override val result = SuccessfulExecution(outputs, returnCode, jobDetritusFiles, executionEvents, resultsClonedFrom) } diff --git a/backend/src/main/scala/cromwell/backend/async/ExecutionResult.scala b/backend/src/main/scala/cromwell/backend/async/ExecutionResult.scala index 267bea8779e..ff972233758 100644 --- a/backend/src/main/scala/cromwell/backend/async/ExecutionResult.scala +++ b/backend/src/main/scala/cromwell/backend/async/ExecutionResult.scala @@ -1,7 +1,9 @@ package cromwell.backend.async +import java.nio.file.Path + import cromwell.backend.BackendJobDescriptor -import cromwell.core.{ExecutionEvent, JobOutputs} +import cromwell.core.{ExecutionEvent, CallOutputs} /** * ADT representing the result of an execution of a BackendCall. @@ -11,9 +13,9 @@ sealed trait ExecutionResult /** * A successful execution with resolved outputs. */ -final case class SuccessfulExecution(outputs: JobOutputs, +final case class SuccessfulExecution(outputs: CallOutputs, returnCode: Int, - jobDetritusFiles: Map[String, String], + jobDetritusFiles: Map[String, Path], executionEvents: Seq[ExecutionEvent], resultsClonedFrom: Option[BackendJobDescriptor] = None) extends ExecutionResult diff --git a/backend/src/main/scala/cromwell/backend/backend.scala b/backend/src/main/scala/cromwell/backend/backend.scala index 8ac55a3471f..e1addfe30b1 100644 --- a/backend/src/main/scala/cromwell/backend/backend.scala +++ b/backend/src/main/scala/cromwell/backend/backend.scala @@ -2,20 +2,19 @@ package cromwell.backend import com.typesafe.config.Config import cromwell.core.WorkflowOptions.WorkflowOption -import cromwell.core.{JobKey, WorkflowId, WorkflowOptions} +import cromwell.core.{CallKey, WorkflowId, WorkflowOptions} +import wdl4s._ import wdl4s.values.WdlValue -import wdl4s.{Call, NamespaceWithWorkflow, _} import scala.util.Try /** * For uniquely identifying a job which has been or will be sent to the backend. */ -case class BackendJobDescriptorKey(call: Call, index: Option[Int], attempt: Int) extends JobKey { +case class BackendJobDescriptorKey(call: TaskCall, index: Option[Int], attempt: Int) extends CallKey { def scope = call private val indexString = index map { _.toString } getOrElse "NA" val tag = s"${call.fullyQualifiedName}:$indexString:$attempt" - val isShard = index.isDefined def mkTag(workflowId: WorkflowId) = s"$workflowId:$this" } @@ -25,19 +24,34 @@ case class BackendJobDescriptorKey(call: Call, index: Option[Int], attempt: Int) case class BackendJobDescriptor(workflowDescriptor: BackendWorkflowDescriptor, key: BackendJobDescriptorKey, runtimeAttributes: Map[LocallyQualifiedName, WdlValue], - inputs: Map[LocallyQualifiedName, WdlValue]) { + inputDeclarations: EvaluatedTaskInputs) { + val fullyQualifiedInputs = inputDeclarations map { case (declaration, value) => declaration.fullyQualifiedName -> value } val call = key.call override val toString = s"${key.mkTag(workflowDescriptor.id)}" } +object BackendWorkflowDescriptor { + def apply(id: WorkflowId, + workflow: Workflow, + inputs: Map[FullyQualifiedName, WdlValue], + workflowOptions: WorkflowOptions) = { + new BackendWorkflowDescriptor(id, workflow, inputs, workflowOptions, List.empty) + } +} + /** * For passing to a BackendActor construction time */ case class BackendWorkflowDescriptor(id: WorkflowId, - workflowNamespace: NamespaceWithWorkflow, + workflow: Workflow, inputs: Map[FullyQualifiedName, WdlValue], - workflowOptions: WorkflowOptions) { - override def toString: String = s"[BackendWorkflowDescriptor id=${id.shortString} workflowName=${workflowNamespace.workflow.unqualifiedName}]" + workflowOptions: WorkflowOptions, + breadCrumbs: List[BackendJobBreadCrumb]) { + + val rootWorkflow = breadCrumbs.headOption.map(_.workflow).getOrElse(workflow) + val rootWorkflowId = breadCrumbs.headOption.map(_.id).getOrElse(id) + + override def toString: String = s"[BackendWorkflowDescriptor id=${id.shortString} workflowName=${workflow.unqualifiedName}]" def getWorkflowOption(key: WorkflowOption) = workflowOptions.get(key).toOption } diff --git a/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala b/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala index c199de6d409..48a6d590eb1 100644 --- a/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala +++ b/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala @@ -4,12 +4,15 @@ import java.nio.file.Path import akka.actor.ActorRef import cromwell.backend.BackendCacheHitCopyingActor -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobSucceededResponse} import cromwell.backend.io.JobPaths -import cromwell.core.PathCopier +import cromwell.core.path.PathCopier import cromwell.core.simpleton.{WdlValueBuilder, WdlValueSimpleton} import wdl4s.values.WdlFile +import scala.language.postfixOps +import scala.util.Try + /** * Mixin implementing common functionality for a BackendCacheHitCopyingActor. * @@ -35,7 +38,7 @@ trait CacheHitDuplicating { * @param file the string version of the path * @return an absolute path to the file with potential credentials embedded within. */ - protected def getPath(file: String): Path + protected def getPath(file: String): Try[Path] protected def destinationCallRootPath: Path @@ -47,9 +50,10 @@ trait CacheHitDuplicating { protected def metadataKeyValues: Map[String, Any] private def lookupSourceCallRootPath(sourceJobDetritusFiles: Map[String, String]): Path = { - sourceJobDetritusFiles.get(JobPaths.CallRootPathKey).map(getPath).getOrElse(throw new RuntimeException( - s"${JobPaths.CallRootPathKey} wasn't found for call ${jobDescriptor.call.fullyQualifiedName}") - ) + sourceJobDetritusFiles.get(JobPaths.CallRootPathKey).map(getPath).get recover { + case failure => + throw new RuntimeException(s"${JobPaths.CallRootPathKey} wasn't found for call ${jobDescriptor.call.fullyQualifiedName}", failure) + } get } /** @@ -59,27 +63,27 @@ trait CacheHitDuplicating { sourceCallRootPath: Path): Seq[WdlValueSimpleton] = { wdlValueSimpletons map { case WdlValueSimpleton(key, wdlFile: WdlFile) => - val sourcePath = getPath(wdlFile.value) + val sourcePath = getPath(wdlFile.value).get val destinationPath = PathCopier.getDestinationFilePath(sourceCallRootPath, sourcePath, destinationCallRootPath) duplicate(sourcePath, destinationPath) - WdlValueSimpleton(key, WdlFile(destinationPath.toString)) + WdlValueSimpleton(key, WdlFile(destinationPath.toUri.toString)) case wdlValueSimpleton => wdlValueSimpleton } } - private def copyDetritus(sourceJobDetritusFiles: Map[String, String]): Map[String, String] = { + private def copyDetritus(sourceJobDetritusFiles: Map[String, String]): Map[String, Path] = { val sourceKeys = sourceJobDetritusFiles.keySet val destinationKeys = destinationJobDetritusPaths.keySet val fileKeys = sourceKeys.intersect(destinationKeys).filterNot(_ == JobPaths.CallRootPathKey) val destinationJobDetritusFiles = fileKeys map { fileKey => - val sourcePath = getPath(sourceJobDetritusFiles(fileKey)) + val sourcePath = getPath(sourceJobDetritusFiles(fileKey)).get val destinationPath = destinationJobDetritusPaths(fileKey) duplicate(sourcePath, destinationPath) - (fileKey, destinationPath.toString) + (fileKey, destinationPath) } - destinationJobDetritusFiles.toMap + (JobPaths.CallRootPathKey -> destinationCallRootPath.toString) + destinationJobDetritusFiles.toMap + (JobPaths.CallRootPathKey -> destinationCallRootPath) } override def copyCachedOutputs(wdlValueSimpletons: Seq[WdlValueSimpleton], @@ -95,6 +99,6 @@ trait CacheHitDuplicating { import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), metadataKeyValues) - SucceededResponse(jobDescriptor.key, returnCodeOption, destinationJobOutputs, Option(destinationJobDetritusFiles), Seq.empty) + JobSucceededResponse(jobDescriptor.key, returnCodeOption, destinationJobOutputs, Option(destinationJobDetritusFiles), Seq.empty) } } diff --git a/backend/src/main/scala/cromwell/backend/io/JobPaths.scala b/backend/src/main/scala/cromwell/backend/io/JobPaths.scala index c9a5ac120f7..5c3a0c9f7da 100644 --- a/backend/src/main/scala/cromwell/backend/io/JobPaths.scala +++ b/backend/src/main/scala/cromwell/backend/io/JobPaths.scala @@ -2,8 +2,7 @@ package cromwell.backend.io import java.nio.file.Path -import com.typesafe.config.Config -import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} +import cromwell.core.JobKey import cromwell.services.metadata.CallMetadataKeys object JobPaths { @@ -15,65 +14,50 @@ object JobPaths { val StdErrPathKey = "stderr" val ReturnCodePathKey = "returnCode" val CallRootPathKey = "callRootPath" -} - -class JobPaths(workflowDescriptor: BackendWorkflowDescriptor, - config: Config, - jobKey: BackendJobDescriptorKey) extends WorkflowPaths(workflowDescriptor, config) { - import JobPaths._ - private def callPathBuilder(root: Path) = { - val callName = jobKey.call.fullyQualifiedName.split('.').last + def callPathBuilder(root: Path, jobKey: JobKey) = { + val callName = jobKey.scope.unqualifiedName val call = s"$CallPrefix-$callName" val shard = jobKey.index map { s => s"$ShardPrefix-$s" } getOrElse "" val retry = if (jobKey.attempt > 1) s"$AttemptPrefix-${jobKey.attempt}" else "" List(call, shard, retry).foldLeft(root)((path, dir) => path.resolve(dir)) } +} - def toDockerPath(path: Path): Path = { - path.toAbsolutePath match { - case p if p.startsWith(WorkflowPaths.DockerRoot) => p - case p => - /** For example: - * - * p = /abs/path/to/cromwell-executions/three-step/f00ba4/call-ps/stdout.txt - * localExecutionRoot = /abs/path/to/cromwell-executions - * subpath = three-step/f00ba4/call-ps/stdout.txt - * - * return value = /root/three-step/f00ba4/call-ps/stdout.txt - * - * TODO: this assumes that p.startsWith(localExecutionRoot) - */ - val subpath = p.subpath(executionRoot.getNameCount, p.getNameCount) - WorkflowPaths.DockerRoot.resolve(subpath) - } - } - - val callRoot = callPathBuilder(workflowRoot) - val callDockerRoot = callPathBuilder(dockerWorkflowRoot) - - val callExecutionRoot = callRoot.resolve("execution") - val callExecutionDockerRoot = callDockerRoot.resolve("execution") - - val callInputsRoot = callRoot.resolve("inputs") - - val stdout = callExecutionRoot.resolve("stdout") - val stderr = callExecutionRoot.resolve("stderr") - val script = callExecutionRoot.resolve("script") - val returnCode = callExecutionRoot.resolve("rc") +trait JobPaths { this: WorkflowPaths => + import JobPaths._ - lazy val metadataPaths: Map[String, Path] = Map( + def returnCodeFilename: String = "rc" + def stdoutFilename: String = "stdout" + def stderrFilename: String = "stderr" + def scriptFilename: String = "script" + + def jobKey: JobKey + lazy val callRoot = callPathBuilder(workflowRoot, jobKey) + lazy val callExecutionRoot = callRoot + lazy val stdout = callExecutionRoot.resolve(stdoutFilename) + lazy val stderr = callExecutionRoot.resolve(stderrFilename) + lazy val script = callExecutionRoot.resolve(scriptFilename) + lazy val returnCode = callExecutionRoot.resolve(returnCodeFilename) + + private lazy val commonMetadataPaths: Map[String, Path] = Map( CallMetadataKeys.CallRoot -> callRoot, CallMetadataKeys.Stdout -> stdout, CallMetadataKeys.Stderr -> stderr ) - lazy val detritusPaths: Map[String, Path] = Map( + private lazy val commonDetritusPaths: Map[String, Path] = Map( JobPaths.CallRootPathKey -> callRoot, JobPaths.ScriptPathKey -> script, JobPaths.StdoutPathKey -> stdout, JobPaths.StdErrPathKey -> stderr, JobPaths.ReturnCodePathKey -> returnCode ) + + protected lazy val customMetadataPaths: Map[String, Path] = Map.empty + protected lazy val customDetritusPaths: Map[String, Path] = Map.empty + + lazy val metadataPaths = commonMetadataPaths ++ customMetadataPaths + lazy val detritusPaths = commonDetritusPaths ++ customDetritusPaths } diff --git a/backend/src/main/scala/cromwell/backend/io/JobPathsWithDocker.scala b/backend/src/main/scala/cromwell/backend/io/JobPathsWithDocker.scala new file mode 100644 index 00000000000..49c748e883e --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/io/JobPathsWithDocker.scala @@ -0,0 +1,39 @@ +package cromwell.backend.io + +import java.nio.file.Path + +import com.typesafe.config.Config +import cromwell.backend.{BackendWorkflowDescriptor, BackendJobDescriptorKey} +import cromwell.core.path.PathBuilder + +class JobPathsWithDocker(val jobKey: BackendJobDescriptorKey, + workflowDescriptor: BackendWorkflowDescriptor, + config: Config, + pathBuilders: List[PathBuilder] = WorkflowPaths.DefaultPathBuilders) extends WorkflowPathsWithDocker( + workflowDescriptor, config, pathBuilders) with JobPaths { + import JobPaths._ + + override lazy val callExecutionRoot = { callRoot.resolve("execution") } + val callDockerRoot = callPathBuilder(dockerWorkflowRoot, jobKey) + val callExecutionDockerRoot = callDockerRoot.resolve("execution") + val callInputsRoot = callRoot.resolve("inputs") + + def toDockerPath(path: Path): Path = { + path.toAbsolutePath match { + case p if p.startsWith(WorkflowPathsWithDocker.DockerRoot) => p + case p => + /* For example: + * + * p = /abs/path/to/cromwell-executions/three-step/f00ba4/call-ps/stdout.txt + * localExecutionRoot = /abs/path/to/cromwell-executions + * subpath = three-step/f00ba4/call-ps/stdout.txt + * + * return value = /root/three-step/f00ba4/call-ps/stdout.txt + * + * TODO: this assumes that p.startsWith(localExecutionRoot) + */ + val subpath = p.subpath(executionRoot.getNameCount, p.getNameCount) + WorkflowPathsWithDocker.DockerRoot.resolve(subpath) + } + } +} \ No newline at end of file diff --git a/backend/src/main/scala/cromwell/backend/io/WorkflowPaths.scala b/backend/src/main/scala/cromwell/backend/io/WorkflowPaths.scala index 23bdae992a5..be959aec59d 100644 --- a/backend/src/main/scala/cromwell/backend/io/WorkflowPaths.scala +++ b/backend/src/main/scala/cromwell/backend/io/WorkflowPaths.scala @@ -1,26 +1,37 @@ package cromwell.backend.io -import java.nio.file.{FileSystem, FileSystems, Path, Paths} +import java.nio.file.Path import com.typesafe.config.Config import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} -import cromwell.core.PathFactory +import cromwell.core.WorkflowOptions.FinalCallLogsDir +import cromwell.core.path.{DefaultPathBuilder, PathFactory} import net.ceedubs.ficus.Ficus._ -object WorkflowPaths{ - val DockerRoot = Paths.get("/root") -} +import scala.util.Try -class WorkflowPaths(workflowDescriptor: BackendWorkflowDescriptor, config: Config, val fileSystems: List[FileSystem] = List(FileSystems.getDefault)) extends PathFactory { - val executionRoot = Paths.get(config.as[Option[String]]("root").getOrElse("cromwell-executions")).toAbsolutePath +object WorkflowPaths { + val DefaultPathBuilders = List(DefaultPathBuilder) +} - private def workflowPathBuilder(root: Path) = { - root.resolve(workflowDescriptor.workflowNamespace.workflow.unqualifiedName) - .resolve(workflowDescriptor.id.toString) +trait WorkflowPaths extends PathFactory { + def workflowDescriptor: BackendWorkflowDescriptor + def config: Config + + protected lazy val executionRootString = config.as[Option[String]]("root").getOrElse("cromwell-executions") + + def getPath(url: String): Try[Path] = Try(PathFactory.buildPath(url, pathBuilders)) + + // Rebuild potential intermediate call directories in case of a sub workflow + protected def workflowPathBuilder(root: Path) = { + workflowDescriptor.breadCrumbs.foldLeft(root)((acc, breadCrumb) => { + breadCrumb.toPath(acc) + }).resolve(workflowDescriptor.workflow.unqualifiedName).resolve(workflowDescriptor.id.toString + "/") } + lazy val executionRoot = PathFactory.buildPath(executionRootString, pathBuilders).toAbsolutePath lazy val workflowRoot = workflowPathBuilder(executionRoot) - lazy val dockerWorkflowRoot = workflowPathBuilder(WorkflowPaths.DockerRoot) + lazy val finalCallLogsPath = workflowDescriptor.getWorkflowOption(FinalCallLogsDir) map getPath map { _.get } - def toJobPaths(jobKey: BackendJobDescriptorKey) = new JobPaths(workflowDescriptor, config, jobKey) + def toJobPaths(jobKey: BackendJobDescriptorKey): JobPaths } diff --git a/backend/src/main/scala/cromwell/backend/io/WorkflowPathsBackendInitializationData.scala b/backend/src/main/scala/cromwell/backend/io/WorkflowPathsBackendInitializationData.scala index 6ac8ba96023..b0861d6bbb1 100644 --- a/backend/src/main/scala/cromwell/backend/io/WorkflowPathsBackendInitializationData.scala +++ b/backend/src/main/scala/cromwell/backend/io/WorkflowPathsBackendInitializationData.scala @@ -1,8 +1,7 @@ package cromwell.backend.io -import java.nio.file.FileSystem - import cromwell.backend.BackendInitializationData +import cromwell.core.path.PathBuilder /** * Extension of backend initialization data that also provides a `WorkflowPaths`, and by proxy its `List[FileSystem]`. @@ -39,7 +38,7 @@ object WorkflowPathsBackendInitializationData { BackendInitializationData.as[WorkflowPathsBackendInitializationData](initializationDataOption).workflowPaths } - def fileSystems(initializationDataOption: Option[BackendInitializationData]): List[FileSystem] = { - workflowPaths(initializationDataOption).fileSystems + def pathBuilders(initializationDataOption: Option[BackendInitializationData]): List[PathBuilder] = { + workflowPaths(initializationDataOption).pathBuilders } } diff --git a/backend/src/main/scala/cromwell/backend/io/WorkflowPathsWithDocker.scala b/backend/src/main/scala/cromwell/backend/io/WorkflowPathsWithDocker.scala new file mode 100644 index 00000000000..c10e66972fd --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/io/WorkflowPathsWithDocker.scala @@ -0,0 +1,16 @@ +package cromwell.backend.io + +import java.nio.file.Paths + +import com.typesafe.config.Config +import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} +import cromwell.core.path.PathBuilder + +object WorkflowPathsWithDocker { + val DockerRoot = Paths.get("/root") +} + +class WorkflowPathsWithDocker(val workflowDescriptor: BackendWorkflowDescriptor, val config: Config, val pathBuilders: List[PathBuilder] = WorkflowPaths.DefaultPathBuilders) extends WorkflowPaths { + val dockerWorkflowRoot = workflowPathBuilder(WorkflowPathsWithDocker.DockerRoot) + override def toJobPaths(jobKey: BackendJobDescriptorKey): JobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, config, pathBuilders) +} \ No newline at end of file diff --git a/backend/src/main/scala/cromwell/backend/package.scala b/backend/src/main/scala/cromwell/backend/package.scala index 3bad6f61f72..132fcf57887 100644 --- a/backend/src/main/scala/cromwell/backend/package.scala +++ b/backend/src/main/scala/cromwell/backend/package.scala @@ -1,14 +1,6 @@ package cromwell -import wdl4s.values.WdlValue - -import scala.language.postfixOps -import scala.util.Success - package object backend { - implicit class AugmentedAttemptedLookupSequence(s: Seq[AttemptedLookupResult]) { - def toLookupMap: Map[String, WdlValue] = s collect { - case AttemptedLookupResult(name, Success(value)) => (name, value) - } toMap - } + /** Represents the jobKeys executed by a (potentially sub-) workflow at a given point in time */ + type JobExecutionMap = Map[BackendWorkflowDescriptor, List[BackendJobDescriptorKey]] } diff --git a/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala b/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala index 9e56c71be0f..1ba92527ecd 100644 --- a/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala +++ b/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala @@ -1,9 +1,8 @@ package cromwell.backend.validation import cats.syntax.validated._ -import cromwell.backend.wdl.OnlyPureFunctions +import wdl4s.expression.PureStandardLibraryFunctions import cromwell.backend.{MemorySize, RuntimeAttributeDefinition} -import cromwell.core._ import cromwell.core.ErrorOr._ import org.slf4j.Logger import wdl4s.WdlExpression @@ -336,7 +335,7 @@ trait RuntimeAttributesValidation[ValidatedType] { For now, if something tries to "lookup" a value, convert it to a WdlString. */ val wdlStringLookup: ScopedLookupFunction = (value: String) => WdlString(value) - wdlExpression.evaluate(wdlStringLookup, OnlyPureFunctions) match { + wdlExpression.evaluate(wdlStringLookup, PureStandardLibraryFunctions) match { case Success(wdlValue) => validateExpression.applyOrElse(wdlValue, (_: Any) => false) case Failure(throwable) => throw new RuntimeException(s"Expression evaluation failed due to $throwable: $wdlExpression", throwable) diff --git a/backend/src/main/scala/cromwell/backend/wdl/Command.scala b/backend/src/main/scala/cromwell/backend/wdl/Command.scala new file mode 100644 index 00000000000..b0e3c92947e --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/wdl/Command.scala @@ -0,0 +1,31 @@ +package cromwell.backend.wdl + +import cromwell.backend.BackendJobDescriptor +import wdl4s.EvaluatedTaskInputs +import wdl4s.expression.WdlFunctions +import wdl4s.values.WdlValue + +import scala.util.{Success, Try} + +object Command { + + /** + * Instantiate the command for this job descriptor. + * + * @param jobDescriptor jobDescriptor to instantiate the command for + * @param callEngineFunction engine functions to use to evaluate expressions inside the command + * @param inputsPreProcessor function to be applied to the task inputs before they are used to instantiate the command + * Typically this is where localization and/or file path transformation work would be done. + * The return value of the function is the inputs map that will be used to resolve variables in the command line. + * @param valueMapper function to apply, during instantiation of the command line, after a variable is resolved + * @return + */ + def instantiate(jobDescriptor: BackendJobDescriptor, + callEngineFunction: WdlFunctions[WdlValue], + inputsPreProcessor: EvaluatedTaskInputs => Try[EvaluatedTaskInputs] = (i: EvaluatedTaskInputs) => Success(i), + valueMapper: WdlValue => WdlValue = identity): Try[String] = { + inputsPreProcessor(jobDescriptor.inputDeclarations) flatMap { mappedInputs => + jobDescriptor.call.task.instantiateCommand(mappedInputs, callEngineFunction, valueMapper) + } + } +} diff --git a/backend/src/main/scala/cromwell/backend/wdl/FileSystems.scala b/backend/src/main/scala/cromwell/backend/wdl/FileSystems.scala deleted file mode 100644 index 8919b005805..00000000000 --- a/backend/src/main/scala/cromwell/backend/wdl/FileSystems.scala +++ /dev/null @@ -1,30 +0,0 @@ -package cromwell.backend.wdl - -import java.nio.file.{FileSystem, Path} - -import cromwell.core.PathFactory - -trait FileSystems extends PathFactory { - - /** - * Ordered list of filesystems to be used to execute wdl functions needing IO. - */ - def fileSystems: List[FileSystem] - - /** - * Function applied after a string is successfully resolved to a java.nio.Path - */ - def postMapping(path: Path): Path = path - - /** - * Function applied before a string is attempted to be resolved to a java.nio.Path - */ - def preMapping(string: String): String = string - - /** - * Use fileSystems in order to try to create a java.nio.Path from path that will be used to perform IO. - * If no filesystem is able to construct a Path from the String, an exception will be raised. - */ - protected final def toPath(path: String) = postMapping(buildPath(preMapping(path), fileSystems)) - -} diff --git a/backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala b/backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala new file mode 100644 index 00000000000..ab6c66b0d89 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala @@ -0,0 +1,19 @@ +package cromwell.backend.wdl + +import cromwell.backend.BackendJobDescriptor +import cromwell.core.JobOutput +import wdl4s.LocallyQualifiedName +import wdl4s.expression.WdlStandardLibraryFunctions +import wdl4s.values.WdlValue + +import scala.util.{Success, Try} + +object OutputEvaluator { + def evaluateOutputs(jobDescriptor: BackendJobDescriptor, + wdlFunctions: WdlStandardLibraryFunctions, + postMapper: WdlValue => Try[WdlValue] = v => Success(v)): Try[Map[LocallyQualifiedName, JobOutput]] = { + jobDescriptor.call.task.evaluateOutputs(jobDescriptor.inputDeclarations, wdlFunctions, postMapper) map { outputs => + outputs mapValues JobOutput + } + } +} diff --git a/backend/src/main/scala/cromwell/backend/wdl/PureFunctions.scala b/backend/src/main/scala/cromwell/backend/wdl/PureFunctions.scala deleted file mode 100644 index 9f297d47799..00000000000 --- a/backend/src/main/scala/cromwell/backend/wdl/PureFunctions.scala +++ /dev/null @@ -1,60 +0,0 @@ -package cromwell.backend.wdl - -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.types.{WdlArrayType, WdlIntegerType, WdlStringType} -import wdl4s.values.{WdlArray, WdlFile, WdlFloat, WdlInteger, WdlString, WdlValue} - -import scala.util.{Failure, Success, Try} - -case object OnlyPureFunctions extends WdlStandardLibraryFunctions with PureFunctions { - override def readFile(path: String): String = throw new NotImplementedError("readFile not available in PureNoFunctions.") - override def read_json(params: Seq[Try[WdlValue]]): Try[WdlValue] = throw new NotImplementedError("read_json not available in PureNoFunctions.") - override def write_json(params: Seq[Try[WdlValue]]): Try[WdlFile] = throw new NotImplementedError("write_json not available in PureNoFunctions.") - override def size(params: Seq[Try[WdlValue]]): Try[WdlFloat] = throw new NotImplementedError("size not available in PureNoFunctions.") - override def write_tsv(params: Seq[Try[WdlValue]]): Try[WdlFile] = throw new NotImplementedError("write_tsv not available in PureNoFunctions.") - override def stdout(params: Seq[Try[WdlValue]]): Try[WdlFile] = throw new NotImplementedError("stdout not available in PureNoFunctions.") - override def glob(path: String, pattern: String): Seq[String] = throw new NotImplementedError("glob not available in PureNoFunctions.") - override def writeTempFile(path: String, prefix: String, suffix: String, content: String): String = throw new NotImplementedError("writeTempFile not available in PureNoFunctions.") - override def stderr(params: Seq[Try[WdlValue]]): Try[WdlFile] = throw new NotImplementedError("stderr not available in PureNoFunctions.") -} - -trait PureFunctions { this: WdlStandardLibraryFunctions => - - def range(params: Seq[Try[WdlValue]]): Try[WdlArray] = { - def extractAndValidateArguments = params.size match { - case 1 => validateArguments(params.head) - case n => Failure(new IllegalArgumentException(s"Invalid number of parameters for engine function seq: $n. Ensure seq(x: WdlInteger) takes exactly 1 parameters.")) - } - - def validateArguments(value: Try[WdlValue]) = value match { - case Success(intValue: WdlValue) if WdlIntegerType.isCoerceableFrom(intValue.wdlType) => - Integer.valueOf(intValue.valueString) match { - case i if i >= 0 => Success(i) - case n => Failure(new IllegalArgumentException(s"Parameter to seq must be greater than or equal to 0 (but got $n)")) - } - case _ => Failure(new IllegalArgumentException(s"Invalid parameter for engine function seq: $value.")) - } - - extractAndValidateArguments map { intValue => WdlArray(WdlArrayType(WdlIntegerType), (0 until intValue).map(WdlInteger(_))) } - } - - override def sub(params: Seq[Try[WdlValue]]): Try[WdlString] = { - def extractArguments = params.size match { - case 3 => Success((params.head, params(1), params(2))) - case n => Failure(new IllegalArgumentException(s"Invalid number of parameters for engine function sub: $n. sub takes exactly 3 parameters.")) - } - - def validateArguments(values: (Try[WdlValue], Try[WdlValue], Try[WdlValue])) = values match { - case (Success(strValue), Success(WdlString(pattern)), Success(replaceValue)) - if WdlStringType.isCoerceableFrom(strValue.wdlType) && - WdlStringType.isCoerceableFrom(replaceValue.wdlType) => - Success((strValue.valueString, pattern, replaceValue.valueString)) - case _ => Failure(new IllegalArgumentException(s"Invalid parameters for engine function sub: $values.")) - } - - for { - args <- extractArguments - (str, pattern, replace) <- validateArguments(args) - } yield WdlString(pattern.r.replaceAllIn(str, replace)) - } -} diff --git a/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala b/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala index 1f06e10d853..0f3e4679ee5 100644 --- a/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala +++ b/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala @@ -1,6 +1,7 @@ package cromwell.backend.wdl import cromwell.backend.MemorySize +import cromwell.core.path.PathFactory import wdl4s.expression.WdlStandardLibraryFunctions import wdl4s.parser.MemoryUnit import wdl4s.types.{WdlArrayType, WdlFileType, WdlObjectType, WdlStringType} @@ -8,7 +9,7 @@ import wdl4s.values._ import scala.util.{Failure, Success, Try} -trait ReadLikeFunctions extends FileSystems { this: WdlStandardLibraryFunctions => +trait ReadLikeFunctions extends PathFactory { this: WdlStandardLibraryFunctions => import better.files._ /** @@ -27,7 +28,7 @@ trait ReadLikeFunctions extends FileSystems { this: WdlStandardLibraryFunctions wdlObjects <- WdlObject.fromTsv(contents) } yield wdlObjects - override def readFile(path: String): String = File(toPath(path)).contentAsString + override def readFile(path: String): String = File(buildPath(path)).contentAsString /** * Read all lines from the file referenced by the first parameter and return an Array[String] @@ -93,7 +94,7 @@ trait ReadLikeFunctions extends FileSystems { this: WdlStandardLibraryFunctions for { value <- wdlValue unit <- convertTo - } yield MemorySize(File(toPath(value.valueString)).size.toDouble, MemoryUnit.Bytes).to(unit).amount + } yield MemorySize(File(buildPath(value.valueString)).size.toDouble, MemoryUnit.Bytes).to(unit).amount } params match { diff --git a/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala b/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala index cb8eea297af..0f602c76e3b 100644 --- a/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala +++ b/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala @@ -16,21 +16,17 @@ trait WriteFunctions { this: WdlStandardLibraryFunctions => */ def writeDirectory: Path - private lazy val absoluteDirectory = { - File(writeDirectory).createDirectories().path - } - - override def tempFilePath = absoluteDirectory.toString + private lazy val _writeDirectory = File(writeDirectory).createDirectories() def writeTempFile(path: String,prefix: String,suffix: String,content: String): String = throw new NotImplementedError("This method is not used anywhere and should be removed") private def writeContent(baseName: String, content: String): Try[WdlFile] = { - val fullPath = File(absoluteDirectory)./(s"$baseName${content.md5Sum}.tmp") + val tmpFile = _writeDirectory / s"$baseName-${content.md5Sum}.tmp" Try { - if (!fullPath.exists) fullPath.write(content) + if (tmpFile.notExists) tmpFile.write(content) } map { _ => - WdlFile(fullPath.pathAsString) + WdlFile(tmpFile.uri.toString) } } diff --git a/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowFileSystemProvider.scala b/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowFileSystemProvider.scala deleted file mode 100644 index a10b46a0d5e..00000000000 --- a/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowFileSystemProvider.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.backend.wfs - -import java.nio.file.FileSystems - -object DefaultWorkflowFileSystemProvider extends WorkflowFileSystemProvider { - override def fileSystemOption(params: WorkflowFileSystemProviderParams) = { - Option(FileSystems.getDefault) - } -} diff --git a/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowPathBuilder.scala b/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowPathBuilder.scala new file mode 100644 index 00000000000..43af11d8732 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowPathBuilder.scala @@ -0,0 +1,8 @@ +package cromwell.backend.wfs + +import cromwell.core.path.DefaultPathBuilder + + +object DefaultWorkflowPathBuilder extends WorkflowPathBuilder { + override def pathBuilderOption(params: WorkflowFileSystemProviderParams) = Option(DefaultPathBuilder) +} diff --git a/backend/src/main/scala/cromwell/backend/wfs/WorkflowFileSystemProvider.scala b/backend/src/main/scala/cromwell/backend/wfs/WorkflowFileSystemProvider.scala deleted file mode 100644 index de8272473ed..00000000000 --- a/backend/src/main/scala/cromwell/backend/wfs/WorkflowFileSystemProvider.scala +++ /dev/null @@ -1,34 +0,0 @@ -package cromwell.backend.wfs - -import java.nio.file.FileSystem - -import com.typesafe.config.{Config, ConfigFactory} -import cromwell.backend.io.WorkflowPaths -import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} -import cromwell.core.WorkflowOptions -import net.ceedubs.ficus.Ficus._ - -import scala.concurrent.ExecutionContext - -object WorkflowFileSystemProvider { - def workflowPaths(configurationDescriptor: BackendConfigurationDescriptor, - workflowDescriptor: BackendWorkflowDescriptor, - providers: Traversable[WorkflowFileSystemProvider], - fileSystemExecutionContext: ExecutionContext): WorkflowPaths = { - val backendConfig = configurationDescriptor.backendConfig - val fileSystemConfig = backendConfig.as[Option[Config]]("filesystems").getOrElse(ConfigFactory.empty()) - val globalConfig = configurationDescriptor.globalConfig - val params = WorkflowFileSystemProviderParams(fileSystemConfig, globalConfig, workflowDescriptor.workflowOptions, - fileSystemExecutionContext) - val fileSystems = providers.flatMap(_.fileSystemOption(params)).toList - new WorkflowPaths(workflowDescriptor, configurationDescriptor.backendConfig, fileSystems) - } -} - -final case class WorkflowFileSystemProviderParams(fileSystemConfig: Config, globalConfig: Config, - workflowOptions: WorkflowOptions, - fileSystemExecutionContext: ExecutionContext) - -trait WorkflowFileSystemProvider { - def fileSystemOption(params: WorkflowFileSystemProviderParams): Option[FileSystem] -} diff --git a/backend/src/main/scala/cromwell/backend/wfs/WorkflowPathBuilder.scala b/backend/src/main/scala/cromwell/backend/wfs/WorkflowPathBuilder.scala new file mode 100644 index 00000000000..bd39ae2c911 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/wfs/WorkflowPathBuilder.scala @@ -0,0 +1,25 @@ +package cromwell.backend.wfs + +import com.typesafe.config.Config +import cromwell.backend.io.{WorkflowPathsWithDocker, WorkflowPaths} +import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} +import cromwell.core.WorkflowOptions +import cromwell.core.path.PathBuilder + +import scala.concurrent.ExecutionContext + +object WorkflowPathBuilder { + def workflowPaths(configurationDescriptor: BackendConfigurationDescriptor, + workflowDescriptor: BackendWorkflowDescriptor, + pathBuilders: List[PathBuilder]): WorkflowPaths = { + new WorkflowPathsWithDocker(workflowDescriptor, configurationDescriptor.backendConfig, pathBuilders) + } +} + +final case class WorkflowFileSystemProviderParams(fileSystemConfig: Config, globalConfig: Config, + workflowOptions: WorkflowOptions, + fileSystemExecutionContext: ExecutionContext) + +trait WorkflowPathBuilder { + def pathBuilderOption(params: WorkflowFileSystemProviderParams): Option[PathBuilder] +} diff --git a/backend/src/test/scala/cromwell/backend/BackendSpec.scala b/backend/src/test/scala/cromwell/backend/BackendSpec.scala index c5617ab43ed..7e2354bd515 100644 --- a/backend/src/test/scala/cromwell/backend/BackendSpec.scala +++ b/backend/src/test/scala/cromwell/backend/BackendSpec.scala @@ -1,18 +1,20 @@ package cromwell.backend import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse, FailedRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.backend.io.TestWorkflows._ import cromwell.core.{WorkflowId, WorkflowOptions} +import wdl4s.util.AggregatedException import org.scalatest.Matchers import org.scalatest.concurrent.ScalaFutures import org.scalatest.time.{Millis, Seconds, Span} +import org.specs2.mock.Mockito import spray.json.{JsObject, JsValue} import wdl4s._ import wdl4s.expression.NoFunctions import wdl4s.values.WdlValue -trait BackendSpec extends ScalaFutures with Matchers { +trait BackendSpec extends ScalaFutures with Matchers with Mockito { implicit val defaultPatience = PatienceConfig(timeout = Span(5, Seconds), interval = Span(500, Millis)) @@ -26,32 +28,44 @@ trait BackendSpec extends ScalaFutures with Matchers { runtime: String = "") = { BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime)), + WdlNamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime), Seq.empty[ImportResolver]).workflow, inputs, options ) } + def fqnMapToDeclarationMap(m: Map[String, WdlValue]): Map[Declaration, WdlValue] = { + m map { + case (fqn, v) => + val mockDeclaration = mock[Declaration] + mockDeclaration.fullyQualifiedName returns fqn + mockDeclaration.unqualifiedName returns fqn.split('.').lastOption.getOrElse(fqn) + mockDeclaration -> v + } + } + def jobDescriptorFromSingleCallWorkflow(workflowDescriptor: BackendWorkflowDescriptor, inputs: Map[String, WdlValue], options: WorkflowOptions, runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition]): BackendJobDescriptor = { - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val jobKey = BackendJobDescriptorKey(call, None, 1) - val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, inputs).get // .get is OK here because this is a test + val inputDeclarations = call.evaluateTaskInputs(inputs, NoFunctions) + val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, inputDeclarations).get // .get is OK here because this is a test val runtimeAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes(runtimeAttributeDefinitions, options)(evaluatedAttributes) - BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, inputs) + BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, inputDeclarations) } def jobDescriptorFromSingleCallWorkflow(wdl: WdlSource, options: WorkflowOptions, runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition]): BackendJobDescriptor = { val workflowDescriptor = buildWorkflowDescriptor(wdl) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val jobKey = BackendJobDescriptorKey(call, None, 1) - val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, workflowDescriptor.inputs).get // .get is OK here because this is a test + val inputDeclarations = fqnMapToDeclarationMap(workflowDescriptor.inputs) + val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, inputDeclarations).get // .get is OK here because this is a test val runtimeAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes(runtimeAttributeDefinitions, options)(evaluatedAttributes) - BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, workflowDescriptor.inputs) + BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, inputDeclarations) } def jobDescriptorFromSingleCallWorkflow(wdl: WdlSource, @@ -60,16 +74,17 @@ trait BackendSpec extends ScalaFutures with Matchers { options: WorkflowOptions, runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition]): BackendJobDescriptor = { val workflowDescriptor = buildWorkflowDescriptor(wdl, runtime = runtime) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val jobKey = BackendJobDescriptorKey(call, None, attempt) - val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, workflowDescriptor.inputs).get // .get is OK here because this is a test + val inputDeclarations = fqnMapToDeclarationMap(workflowDescriptor.inputs) + val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, inputDeclarations).get // .get is OK here because this is a test val runtimeAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes(runtimeAttributeDefinitions, options)(evaluatedAttributes) - BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, workflowDescriptor.inputs) + BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, inputDeclarations) } def assertResponse(executionResponse: BackendJobExecutionResponse, expectedResponse: BackendJobExecutionResponse) = { (executionResponse, expectedResponse) match { - case (SucceededResponse(_, _, responseOutputs, _, _), SucceededResponse(_, _, expectedOutputs, _, _)) => + case (JobSucceededResponse(_, _, responseOutputs, _, _), JobSucceededResponse(_, _, expectedOutputs, _, _)) => responseOutputs.size shouldBe expectedOutputs.size responseOutputs foreach { case (fqn, out) => @@ -77,16 +92,23 @@ trait BackendSpec extends ScalaFutures with Matchers { expectedOut.isDefined shouldBe true expectedOut.get.wdlValue.valueString shouldBe out.wdlValue.valueString } - case (FailedNonRetryableResponse(_, failure, _), FailedNonRetryableResponse(_, expectedFailure, _)) => + case (JobFailedNonRetryableResponse(_, failure, _), JobFailedNonRetryableResponse(_, expectedFailure, _)) => failure.getClass shouldBe expectedFailure.getClass - failure.getMessage should include(expectedFailure.getMessage) - case (FailedRetryableResponse(_, failure, _), FailedRetryableResponse(_, expectedFailure, _)) => + concatenateCauseMessages(failure) should include(expectedFailure.getMessage) + case (JobFailedRetryableResponse(_, failure, _), JobFailedRetryableResponse(_, expectedFailure, _)) => failure.getClass shouldBe expectedFailure.getClass case (response, expectation) => fail(s"Execution response $response wasn't conform to expectation $expectation") } } + private def concatenateCauseMessages(t: Throwable): String = t match { + case null => "" + case ae: AggregatedException => ae.getMessage + ae.exceptions.map(concatenateCauseMessages(_)).mkString + concatenateCauseMessages(ae.getCause) + case other: Throwable => other.getMessage + concatenateCauseMessages(t.getCause) + } + + def executeJobAndAssertOutputs(backend: BackendJobExecutionActor, expectedResponse: BackendJobExecutionResponse) = { whenReady(backend.execute) { executionResponse => assertResponse(executionResponse, expectedResponse) @@ -97,13 +119,13 @@ trait BackendSpec extends ScalaFutures with Matchers { ConfigFactory.parseString("{}"), ConfigFactory.load()) def firstJobDescriptorKey(workflowDescriptor: BackendWorkflowDescriptor): BackendJobDescriptorKey = { - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head BackendJobDescriptorKey(call, None, 1) } def firstJobDescriptor(workflowDescriptor: BackendWorkflowDescriptor, inputs: Map[String, WdlValue] = Map.empty) = { - BackendJobDescriptor(workflowDescriptor, firstJobDescriptorKey(workflowDescriptor), Map.empty, inputs) + BackendJobDescriptor(workflowDescriptor, firstJobDescriptorKey(workflowDescriptor), Map.empty, fqnMapToDeclarationMap(inputs)) } } diff --git a/backend/src/test/scala/cromwell/backend/io/JobPathsSpec.scala b/backend/src/test/scala/cromwell/backend/io/JobPathsSpec.scala index f03ca2cf961..dffd648844f 100644 --- a/backend/src/test/scala/cromwell/backend/io/JobPathsSpec.scala +++ b/backend/src/test/scala/cromwell/backend/io/JobPathsSpec.scala @@ -6,7 +6,7 @@ import better.files._ import com.typesafe.config.ConfigFactory import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptorKey, BackendSpec} import org.scalatest.{FlatSpec, Matchers} -import wdl4s.Call +import wdl4s.TaskCall class JobPathsSpec extends FlatSpec with Matchers with BackendSpec { @@ -32,46 +32,46 @@ class JobPathsSpec extends FlatSpec with Matchers with BackendSpec { "JobPaths" should "provide correct paths for a job" in { val wd = buildWorkflowDescriptor(TestWorkflows.HelloWorld) - val call: Call = wd.workflowNamespace.workflow.calls.head + val call: TaskCall = wd.workflow.taskCalls.head val jobKey = BackendJobDescriptorKey(call, None, 1) - val jobPaths = new JobPaths(wd, backendConfig, jobKey) + val jobPaths = new JobPathsWithDocker(jobKey, wd, backendConfig) val id = wd.id jobPaths.callRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello").pathAsString jobPaths.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution").pathAsString jobPaths.returnCode.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution/rc").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/rc").pathAsString jobPaths.script.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution/script").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/script").pathAsString jobPaths.stderr.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution/stderr").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/stderr").pathAsString jobPaths.stdout.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution/stdout").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/stdout").pathAsString jobPaths.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution").pathAsString jobPaths.callDockerRoot.toString shouldBe - File(s"/root/hello/$id/call-hello").pathAsString + File(s"/root/wf_hello/$id/call-hello").pathAsString jobPaths.callExecutionDockerRoot.toString shouldBe - File(s"/root/hello/$id/call-hello/execution").pathAsString - jobPaths.toDockerPath(Paths.get(s"local-cromwell-executions/hello/$id/call-hello/execution/stdout")).toString shouldBe - File(s"/root/hello/$id/call-hello/execution/stdout").pathAsString + File(s"/root/wf_hello/$id/call-hello/execution").pathAsString + jobPaths.toDockerPath(Paths.get(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/stdout")).toString shouldBe + File(s"/root/wf_hello/$id/call-hello/execution/stdout").pathAsString jobPaths.toDockerPath(Paths.get("/root/dock/path")).toString shouldBe File("/root/dock/path").pathAsString val jobKeySharded = BackendJobDescriptorKey(call, Option(0), 1) - val jobPathsSharded = new JobPaths(wd, backendConfig, jobKeySharded) + val jobPathsSharded = new JobPathsWithDocker(jobKeySharded, wd, backendConfig) jobPathsSharded.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/shard-0/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/shard-0/execution").pathAsString val jobKeyAttempt = BackendJobDescriptorKey(call, None, 2) - val jobPathsAttempt = new JobPaths(wd, backendConfig, jobKeyAttempt) + val jobPathsAttempt = new JobPathsWithDocker(jobKeyAttempt, wd, backendConfig) jobPathsAttempt.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/attempt-2/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/attempt-2/execution").pathAsString val jobKeyShardedAttempt = BackendJobDescriptorKey(call, Option(0), 2) - val jobPathsShardedAttempt = new JobPaths(wd, backendConfig, jobKeyShardedAttempt) + val jobPathsShardedAttempt = new JobPathsWithDocker(jobKeyShardedAttempt, wd, backendConfig) jobPathsShardedAttempt.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/shard-0/attempt-2/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/shard-0/attempt-2/execution").pathAsString } } diff --git a/backend/src/test/scala/cromwell/backend/io/TestWorkflows.scala b/backend/src/test/scala/cromwell/backend/io/TestWorkflows.scala index 34497e2ef0b..28d35977d69 100644 --- a/backend/src/test/scala/cromwell/backend/io/TestWorkflows.scala +++ b/backend/src/test/scala/cromwell/backend/io/TestWorkflows.scala @@ -23,7 +23,7 @@ object TestWorkflows { | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -39,7 +39,7 @@ object TestWorkflows { | } |} | - |workflow goodbye { + |workflow wf_goodbye { | call goodbye |} """.stripMargin @@ -61,7 +61,7 @@ object TestWorkflows { | RUNTIME |} | - |workflow localize { + |workflow wf_localize { | File workflowFile | call localize { input: inputFileFromCallInputs = workflowFile } |} @@ -76,7 +76,7 @@ object TestWorkflows { | } |} | - |workflow abort { + |workflow wf_abort { | call abort |} """.stripMargin @@ -93,7 +93,7 @@ object TestWorkflows { | } |} | - |workflow scattering { + |workflow wf_scattering { | Array[Int] numbers = [1, 2, 3] | scatter (i in numbers) { | call scattering { input: intNumber = i } @@ -117,7 +117,7 @@ object TestWorkflows { | } |} | - |workflow localize { + |workflow wf_localize { | call localize |} """.stripMargin @@ -133,7 +133,7 @@ object TestWorkflows { | } |} | - |workflow localize { + |workflow wf_localize { | call localize |} """.stripMargin diff --git a/backend/src/test/scala/cromwell/backend/io/WorkflowPathsSpec.scala b/backend/src/test/scala/cromwell/backend/io/WorkflowPathsSpec.scala index bfae5930da3..36d774bf0b4 100644 --- a/backend/src/test/scala/cromwell/backend/io/WorkflowPathsSpec.scala +++ b/backend/src/test/scala/cromwell/backend/io/WorkflowPathsSpec.scala @@ -2,13 +2,13 @@ package cromwell.backend.io import better.files._ import com.typesafe.config.Config -import cromwell.backend.BackendSpec -import org.mockito.Matchers._ +import cromwell.backend.{BackendJobBreadCrumb, BackendSpec, BackendWorkflowDescriptor} +import cromwell.core.{JobKey, WorkflowId} import org.mockito.Mockito._ -import org.scalatest.mockito.MockitoSugar import org.scalatest.{FlatSpec, Matchers} +import wdl4s.{Call, Workflow} -class WorkflowPathsSpec extends FlatSpec with Matchers with BackendSpec with MockitoSugar { +class WorkflowPathsSpec extends FlatSpec with Matchers with BackendSpec { val backendConfig = mock[Config] @@ -16,11 +16,49 @@ class WorkflowPathsSpec extends FlatSpec with Matchers with BackendSpec with Moc when(backendConfig.hasPath(any[String])).thenReturn(true) when(backendConfig.getString(any[String])).thenReturn("local-cromwell-executions") // This is the folder defined in the config as the execution root dir val wd = buildWorkflowDescriptor(TestWorkflows.HelloWorld) - val workflowPaths = new WorkflowPaths(wd, backendConfig) + val workflowPaths = new WorkflowPathsWithDocker(wd, backendConfig) val id = wd.id workflowPaths.workflowRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id").pathAsString + File(s"local-cromwell-executions/wf_hello/$id").pathAsString workflowPaths.dockerWorkflowRoot.toString shouldBe - s"/root/hello/$id" + s"/root/wf_hello/$id" + } + + "WorkflowPaths" should "provide correct paths for a sub workflow" in { + when(backendConfig.hasPath(any[String])).thenReturn(true) + when(backendConfig.getString(any[String])).thenReturn("local-cromwell-executions") // This is the folder defined in the config as the execution root dir + + val rootWd = mock[BackendWorkflowDescriptor] + val rootWorkflow = mock[Workflow] + val rootWorkflowId = WorkflowId.randomId() + rootWorkflow.unqualifiedName returns "rootWorkflow" + rootWd.workflow returns rootWorkflow + rootWd.id returns rootWorkflowId + + val subWd = mock[BackendWorkflowDescriptor] + val subWorkflow = mock[Workflow] + val subWorkflowId = WorkflowId.randomId() + subWorkflow.unqualifiedName returns "subWorkflow" + subWd.workflow returns subWorkflow + subWd.id returns subWorkflowId + + val call1 = mock[Call] + call1.unqualifiedName returns "call1" + val call2 = mock[Call] + call2.unqualifiedName returns "call2" + + val jobKey = new JobKey { + override def scope = call1 + override def tag: String = "tag1" + override def index: Option[Int] = Option(1) + override def attempt: Int = 2 + } + + subWd.breadCrumbs returns List(BackendJobBreadCrumb(rootWorkflow, rootWorkflowId, jobKey)) + subWd.id returns subWorkflowId + + val workflowPaths = new WorkflowPathsWithDocker(subWd, backendConfig) + workflowPaths.workflowRoot.toString shouldBe File(s"local-cromwell-executions/rootWorkflow/$rootWorkflowId/call-call1/shard-1/attempt-2/subWorkflow/$subWorkflowId").pathAsString + workflowPaths.dockerWorkflowRoot.toString shouldBe s"/root/rootWorkflow/$rootWorkflowId/call-call1/shard-1/attempt-2/subWorkflow/$subWorkflowId" } } diff --git a/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesDefaultSpec.scala b/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesDefaultSpec.scala index d1df9d79063..325f874b86d 100644 --- a/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesDefaultSpec.scala +++ b/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesDefaultSpec.scala @@ -80,7 +80,7 @@ class RuntimeAttributesDefaultSpec extends FlatSpec with Matchers { val defaults = workflowOptionsDefault(workflowOptions, coercionMap) defaults.isFailure shouldBe true - defaults.failed.get.getMessage shouldBe s"Could not parse JsonValue ${map("str")} to valid WdlValue for runtime attribute str" + defaults.failed.get.getMessage shouldBe s": RuntimeException: Could not parse JsonValue ${map("str")} to valid WdlValue for runtime attribute str" } it should "fold default values" in { diff --git a/backend/src/test/scala/cromwell/backend/wdl/PureStandardLibraryFunctionsSpec.scala b/backend/src/test/scala/cromwell/backend/wdl/PureStandardLibraryFunctionsSpec.scala new file mode 100644 index 00000000000..fb650737784 --- /dev/null +++ b/backend/src/test/scala/cromwell/backend/wdl/PureStandardLibraryFunctionsSpec.scala @@ -0,0 +1,30 @@ +package cromwell.backend.wdl + +import org.scalatest.{FlatSpec, Matchers} +import wdl4s.expression.PureStandardLibraryFunctions +import wdl4s.types.{WdlArrayType, WdlIntegerType} +import wdl4s.values.{WdlArray, WdlInteger} + +import scala.util.Success + + +class PureStandardLibraryFunctionsSpec extends FlatSpec with Matchers { + + behavior of "transpose" + + it should "transpose a 2x3 into a 3x2" in { + val inArray = WdlArray(WdlArrayType(WdlArrayType(WdlIntegerType)), List( + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(2), WdlInteger(3))), + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(4), WdlInteger(5), WdlInteger(6))) + )) + + val expectedResult = WdlArray(WdlArrayType(WdlArrayType(WdlIntegerType)), List( + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(4))), + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(2), WdlInteger(5))), + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(3), WdlInteger(6))) + )) + + PureStandardLibraryFunctions.transpose(Seq(Success(inArray))) should be(Success(expectedResult)) + } + +} diff --git a/build.sbt b/build.sbt index 8e666691fc6..bfaae97731e 100644 --- a/build.sbt +++ b/build.sbt @@ -9,6 +9,7 @@ lazy val gcsFileSystem = (project in file("filesystems/gcs")) .settings(gcsFileSystemSettings:_*) .withTestSettings .dependsOn(core) + .dependsOn(core % "test->test") lazy val databaseSql = (project in file("database/sql")) .settings(databaseSqlSettings:_*) diff --git a/core/src/main/resources/logback.xml b/core/src/main/resources/logback.xml new file mode 100644 index 00000000000..e165bc8b2a2 --- /dev/null +++ b/core/src/main/resources/logback.xml @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + %date %X{sourceThread} %-5level - %msg%n + + + + + + + + + + + + + + + + + + + + + + + + + + + ${FILEROLLER_DIR}/${FILEROLLER_NAME} + + + + + + ${FILEROLLER_DIR}/${FILEROLLER_NAMEPATTERN}-${FILEROLLER_NAME} + + + ${FILEROLLER_DIR}/%d{yyyyMMdd}-${FILEROLLER_NAME} + + + + ${FILEROLLER_MAXHISTORY} + + + + ${FILEROLLER_SIZECAP} + + + + + + %d{yyyy-MM-dd HH:mm:ss,SSS} [%thread] %-5level %logger{35} - %msg%n + + + + + + + + + + + + + + + + + diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index 9bf2807eaaa..52df9e77121 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -56,7 +56,7 @@ akka { system { # If 'true', a SIGINT will trigger Cromwell to attempt to abort all currently running jobs before exiting - abort-jobs-on-terminate = false + #abort-jobs-on-terminate = false # Max number of retries per job that the engine will attempt in case of a retryable failure received from the backend max-retries = 10 @@ -97,7 +97,14 @@ workflow-options { // Optional call-caching configuration. call-caching { + # Allows re-use of existing results for jobs you've already run + # (default: false) enabled = false + + # Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies + # to fail for external reasons which should not invalidate the cache (e.g. auth differences between users): + # (default: true) + invalidate-bad-cache-results = true } google { @@ -149,7 +156,7 @@ backend { run-in-background = true runtime-attributes = "String? docker" submit = "/bin/bash ${script}" - submit-docker = "docker run --rm -v ${cwd}:${docker_cwd} -i ${docker} /bin/bash < ${script}" + submit-docker = "docker run --rm -v ${cwd}:${docker_cwd} -i ${docker} /bin/bash ${docker_cwd}/execution/script" # Root directory where Cromwell writes job results. This directory must be # visible and writeable by the Cromwell process as well as the jobs that Cromwell @@ -163,6 +170,7 @@ backend { ] caching { + # When copying a cached result, what type of file duplication should occur. Attempted in the order listed below: duplication-strategy: [ "hard-link", "soft-link", "copy" ] @@ -251,7 +259,7 @@ backend { # #6. Job command. # docker { # #Allow soft links in dockerized jobs - # cmd = "docker run -w %s %s %s %s --rm %s %s" + # cmd = "docker run -w %s %s %s %s --rm %s /bin/bash -c \"%s\"" # defaultWorkingDir = "/workingDir/" # defaultOutputDir = "/output/" # } @@ -311,6 +319,15 @@ backend { # # Base bucket for workflow executions # root = "gs://my-cromwell-workflows-bucket" # + # # Set this to the lower of the two values "Queries per 100 seconds" and "Queries per 100 seconds per user" for + # # your project. + # # + # # Used to help determine maximum throughput to the Google Genomics API. Setting this value too low will + # # cause a drop in performance. Setting this value too high will cause QPS based locks from Google. + # # 1000 is the default "Queries per 100 seconds per user", 50000 is the default "Queries per 100 seconds" + # # See https://cloud.google.com/genomics/quotas for more information + # genomics-api-queries-per-100-seconds = 1000 + # # # Polling for completion backs-off gradually for slower-running jobs. # # This is the maximum polling interval (in seconds): # maximum-polling-interval = 600 @@ -325,6 +342,12 @@ backend { # # A reference to an auth defined in the `google` stanza at the top. This auth is used to create # # Pipelines and manipulate auth JSONs. # auth = "application-default" + # + # // alternative service account to use on the launched compute instance + # // NOTE: If combined with service account authorization, both that serivce account and this service account + # // must be able to read and write to the 'root' GCS path + # compute-service-account = "default" + # # # Endpoint for APIs, no reason to change this unless directed by Google. # endpoint-url = "https://genomics.googleapis.com/" # } @@ -338,6 +361,15 @@ backend { # } #} + #AWS { + # actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" + # config { + # ## These two settings are required to authenticate with the ECS service: + # accessKeyId = "..." + # secretKey = "..." + # } + #} + } } diff --git a/core/src/main/scala/cromwell/core/CallKey.scala b/core/src/main/scala/cromwell/core/CallKey.scala new file mode 100644 index 00000000000..547eadabde3 --- /dev/null +++ b/core/src/main/scala/cromwell/core/CallKey.scala @@ -0,0 +1,7 @@ +package cromwell.core + +import wdl4s.Call + +trait CallKey extends JobKey { + def scope: Call +} diff --git a/core/src/main/scala/cromwell/core/ExecutionStatus.scala b/core/src/main/scala/cromwell/core/ExecutionStatus.scala index 76acb29a4a9..353b44d65a4 100644 --- a/core/src/main/scala/cromwell/core/ExecutionStatus.scala +++ b/core/src/main/scala/cromwell/core/ExecutionStatus.scala @@ -2,7 +2,7 @@ package cromwell.core object ExecutionStatus extends Enumeration { type ExecutionStatus = Value - val NotStarted, Starting, Running, Failed, Preempted, Done, Aborted = Value + val NotStarted, QueuedInCromwell, Starting, Running, Failed, Preempted, Done, Aborted = Value val TerminalStatuses = Set(Failed, Done, Aborted, Preempted) implicit class EnhancedExecutionStatus(val status: ExecutionStatus) extends AnyVal { diff --git a/core/src/main/scala/cromwell/core/ExecutionStore.scala b/core/src/main/scala/cromwell/core/ExecutionStore.scala deleted file mode 100644 index 1632061ce3b..00000000000 --- a/core/src/main/scala/cromwell/core/ExecutionStore.scala +++ /dev/null @@ -1,13 +0,0 @@ -package cromwell.core - -import cromwell.core.ExecutionStatus._ - - -object ExecutionStore { - def empty = ExecutionStore(Map.empty) - type ExecutionStoreEntry = (JobKey, ExecutionStatus) -} - -case class ExecutionStore(store: Map[JobKey, ExecutionStatus]) { - def add(values: Map[JobKey, ExecutionStatus]) = this.copy(store = store ++ values) -} diff --git a/core/src/main/scala/cromwell/core/JobKey.scala b/core/src/main/scala/cromwell/core/JobKey.scala index 374e3d0ebc0..9fd22b31ec4 100644 --- a/core/src/main/scala/cromwell/core/JobKey.scala +++ b/core/src/main/scala/cromwell/core/JobKey.scala @@ -1,9 +1,9 @@ package cromwell.core -import wdl4s.Scope +import wdl4s.{GraphNode, Scope} trait JobKey { - def scope: Scope + def scope: Scope with GraphNode def index: Option[Int] def attempt: Int def tag: String @@ -12,4 +12,6 @@ trait JobKey { import ExecutionIndex.IndexEnhancedIndex s"${scope.fullyQualifiedName}:${index.fromIndex}:$attempt" } + + def isShard = index.isDefined } diff --git a/core/src/main/scala/cromwell/core/OutputStore.scala b/core/src/main/scala/cromwell/core/OutputStore.scala deleted file mode 100644 index 38bee68db7b..00000000000 --- a/core/src/main/scala/cromwell/core/OutputStore.scala +++ /dev/null @@ -1,40 +0,0 @@ -package cromwell.core - -import cromwell.core.ExecutionIndex._ -import cromwell.core.OutputStore.{OutputCallKey, OutputEntry} -import wdl4s.types.WdlType -import wdl4s.util.TryUtil -import wdl4s.values.{WdlCallOutputsObject, WdlValue} -import wdl4s.{Call, Scope} - -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} - -object OutputStore { - case class OutputEntry(name: String, wdlType: WdlType, wdlValue: Option[WdlValue]) - case class OutputCallKey(call: Scope, index: ExecutionIndex) - def empty = OutputStore(Map.empty) -} - -case class OutputStore(store: Map[OutputCallKey, Traversable[OutputEntry]]) { - def add(values: Map[OutputCallKey, Traversable[OutputEntry]]) = this.copy(store = store ++ values) - - def fetchCallOutputEntries(call: Call, index: ExecutionIndex): Try[WdlCallOutputsObject] = { - def outputEntriesToMap(outputs: Traversable[OutputEntry]): Map[String, Try[WdlValue]] = { - outputs map { output => - output.wdlValue match { - case Some(wdlValue) => output.name -> Success(wdlValue) - case None => output.name -> Failure(new RuntimeException(s"Could not retrieve output ${output.name} value")) - } - } toMap - } - - store.get(OutputCallKey(call, index)) match { - case Some(outputs) => - TryUtil.sequenceMap(outputEntriesToMap(outputs), s"Output fetching for call ${call.unqualifiedName}") map { outputsMap => - WdlCallOutputsObject(call, outputsMap) - } - case None => Failure(new RuntimeException(s"Could not find call ${call.unqualifiedName}")) - } - } -} diff --git a/core/src/main/scala/cromwell/core/PathFactory.scala b/core/src/main/scala/cromwell/core/PathFactory.scala deleted file mode 100644 index f85a05c958f..00000000000 --- a/core/src/main/scala/cromwell/core/PathFactory.scala +++ /dev/null @@ -1,135 +0,0 @@ -package cromwell.core - -import java.io.Writer -import java.nio.file.{FileSystem, Path} - -import better.files.File - -import scala.collection.immutable.Queue -import scala.util.{Success, Failure, Try} - -class FileSystemNotFound(str: String) extends CromwellFatalException( - new IllegalArgumentException(s"Could not find suitable filesystem to parse $str") -) - -trait PathFactory { - private val schemeMatcher = """([a-z]+://).*""".r - - def findFileSystem(rawString: String, fss: List[FileSystem], mapping: PartialFunction[FileSystem, Try[Path]]) = { - fss.toStream collect mapping collectFirst { case Success(p) => p } getOrElse { - throw new FileSystemNotFound(rawString) - } - } - - def buildPath(rawString: String, fileSystems: List[FileSystem]): Path = { - findFileSystem(rawString, fileSystems, { - case fs: FileSystem => - if (hasWrongScheme(rawString, fs)) { - Failure(new IllegalArgumentException(s"$rawString scheme doesn't match ${fs.provider.getScheme}")) - } else { - Try(fs.getPath(rawString)) - } - }) - } - - def buildFile(rawString: String, fileSystems: List[FileSystem]): File = File(buildPath(rawString, fileSystems)) - - private def hasWrongScheme(rawString: String, fileSystem: FileSystem): Boolean = { - schemeMatcher.findFirstMatchIn(rawString) match { - case Some(m) => m.group(1) != fileSystem.provider().getScheme - case _ => false - } - } -} - -object PathFactory { - def swapExt(filePath: String, oldExt: String, newExt: String): String = { - filePath.stripSuffix(oldExt) + newExt - } - - implicit class EnhancedPath(val path: Path) extends AnyVal { - def swapExt(oldExt: String, newExt: String): Path = { - path.getFileSystem.getPath(s"${path.toString.stripSuffix(oldExt)}$newExt") - } - - def untailed = UntailedWriter(path) - - def tailed(tailedSize: Int) = TailedWriter(path, tailedSize) - } - - implicit class FlushingAndClosingWriter(writer: Writer) { - /** Convenience method to flush and close in one shot. */ - def flushAndClose() = { - writer.flush() - writer.close() - } - } -} - -/** - * Used with a `ProcessLogger`, writes lines with a newline. - */ -trait PathWriter { - import better.files._ - - val path: Path - lazy val writer: Writer = File(path).newBufferedWriter - - /** - * Passed to `ProcessLogger` to add a new line. - * - * @param string Line to add to the logs. - */ - def writeWithNewline(string: String): Unit = { - writer.write(string) - writer.write("\n") - } -} - -/** - * Used with a `ProcessLogger`, writes lines with a newline. - * - * @param path Path to the log file. - */ -case class UntailedWriter(path: Path) extends PathWriter - -/** - * Used with a `ProcessLogger`, queues up the `tailedSize` number of lines. - * - * @param path Path to the log file. - * @param tailedSize Maximum number of lines to save in the internal FIFO queue. - */ -case class TailedWriter(path: Path, tailedSize: Int) extends PathWriter { - private var isTailed = false - private var tailedLines: Queue[String] = Queue.empty - - /** - * Passed to `ProcessLogger` to add a new line, and adds the line to the tailed queue. - * - * @param string Line to add to the logs. - */ - override def writeWithNewline(string: String): Unit = { - tailedLines :+= string - while (tailedLines.size > tailedSize) { - tailedLines = tailedLines.takeRight(tailedSize) - isTailed = true - } - super.writeWithNewline(string) - } - - /** - * Returns a descriptive tail of the `path` and the last `tailedLines` written. - * - * @return a descriptive tail of the `path` and the last `tailedLines` written. - */ - def tailString: String = { - if (tailedLines.isEmpty) { - s"Contents of $path were empty." - } else if (isTailed) { - s"Last ${tailedLines.size} of $path:\n${tailedLines.mkString("\n")}" - } else { - s"Contents of $path:\n${tailedLines.mkString("\n")}" - } - } -} - diff --git a/core/src/main/scala/cromwell/core/WorkflowMetadataKeys.scala b/core/src/main/scala/cromwell/core/WorkflowMetadataKeys.scala index 80ade0a4da3..922be7ffbc0 100644 --- a/core/src/main/scala/cromwell/core/WorkflowMetadataKeys.scala +++ b/core/src/main/scala/cromwell/core/WorkflowMetadataKeys.scala @@ -13,9 +13,11 @@ object WorkflowMetadataKeys { val WorkflowLog = "workflowLog" val Failures = "failures" val WorkflowRoot = "workflowRoot" + val ParentWorkflowId = "parentWorkflowId" val SubmissionSection = "submittedFiles" val SubmissionSection_Workflow = "workflow" val SubmissionSection_Inputs = "inputs" val SubmissionSection_Options = "options" + val SubmissionSection_Imports = "imports" } diff --git a/core/src/main/scala/cromwell/core/WorkflowSourceFiles.scala b/core/src/main/scala/cromwell/core/WorkflowSourceFiles.scala deleted file mode 100644 index ed03f1733d4..00000000000 --- a/core/src/main/scala/cromwell/core/WorkflowSourceFiles.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.core - -import wdl4s.{WdlJson, WdlSource} - -/** - * Represents the collection of source files that a user submits to run a workflow - */ -final case class WorkflowSourceFiles(wdlSource: WdlSource, inputsJson: WdlJson, - workflowOptionsJson: WorkflowOptionsJson) diff --git a/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala b/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala new file mode 100644 index 00000000000..ac7a7d6aaa4 --- /dev/null +++ b/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala @@ -0,0 +1,43 @@ +package cromwell.core + +import wdl4s.{WdlJson, WdlSource} + +/** + * Represents the collection of source files that a user submits to run a workflow + */ + +sealed trait WorkflowSourceFilesCollection { + def wdlSource: WdlSource + def inputsJson: WdlJson + def workflowOptionsJson: WorkflowOptionsJson + def importsZipFileOption: Option[Array[Byte]] = this match { + case _: WorkflowSourceFilesWithoutImports => None + case WorkflowSourceFilesWithDependenciesZip(_, _, _, importsZip) => Option(importsZip) // i.e. Some(importsZip) if our wiring is correct + } + + def copyOptions(workflowOptions: WorkflowOptionsJson) = this match { + case w: WorkflowSourceFilesWithoutImports => WorkflowSourceFilesWithoutImports(w.wdlSource, w.inputsJson, workflowOptions) + case w: WorkflowSourceFilesWithDependenciesZip => WorkflowSourceFilesWithDependenciesZip(w.wdlSource, w.inputsJson, workflowOptions, w.importsZip) + } +} + +object WorkflowSourceFilesCollection { + def apply(wdlSource: WdlSource, + inputsJson: WdlJson, + workflowOptionsJson: WorkflowOptionsJson, + importsFile: Option[Array[Byte]]): WorkflowSourceFilesCollection = importsFile match { + case Some(imports) => WorkflowSourceFilesWithDependenciesZip(wdlSource, inputsJson, workflowOptionsJson, imports) + case None => WorkflowSourceFilesWithoutImports(wdlSource, inputsJson, workflowOptionsJson) + } +} + +final case class WorkflowSourceFilesWithoutImports(wdlSource: WdlSource, + inputsJson: WdlJson, + workflowOptionsJson: WorkflowOptionsJson) extends WorkflowSourceFilesCollection + +final case class WorkflowSourceFilesWithDependenciesZip(wdlSource: WdlSource, + inputsJson: WdlJson, + workflowOptionsJson: WorkflowOptionsJson, + importsZip: Array[Byte]) extends WorkflowSourceFilesCollection { + override def toString = s"WorkflowSourceFilesWithDependenciesZip($wdlSource, $inputsJson, $workflowOptionsJson, <>)" +} diff --git a/core/src/main/scala/cromwell/core/callcaching/CallCachingMode.scala b/core/src/main/scala/cromwell/core/callcaching/CallCachingMode.scala index df9fb679ae0..ed939d6aaff 100644 --- a/core/src/main/scala/cromwell/core/callcaching/CallCachingMode.scala +++ b/core/src/main/scala/cromwell/core/callcaching/CallCachingMode.scala @@ -19,7 +19,7 @@ case object CallCachingOff extends CallCachingMode { override val withoutWrite = this } -case class CallCachingActivity(readWriteMode: ReadWriteMode) extends CallCachingMode { +case class CallCachingActivity(readWriteMode: ReadWriteMode, options: CallCachingOptions = CallCachingOptions(invalidateBadCacheResults = true)) extends CallCachingMode { override val readFromCache = readWriteMode.r override val writeToCache = readWriteMode.w override lazy val withoutRead: CallCachingMode = if (!writeToCache) CallCachingOff else this.copy(readWriteMode = WriteCache) @@ -35,6 +35,4 @@ case object ReadCache extends ReadWriteMode { override val w = false } case object WriteCache extends ReadWriteMode { override val r = false } case object ReadAndWriteCache extends ReadWriteMode -sealed trait DockerHashingType -case object HashDockerName extends DockerHashingType -case object HashDockerNameAndLookupDockerHash extends DockerHashingType +final case class CallCachingOptions(invalidateBadCacheResults: Boolean = true) diff --git a/core/src/main/scala/cromwell/core/logging/JobLogger.scala b/core/src/main/scala/cromwell/core/logging/JobLogger.scala index 3eb96d4b5d0..1388f5f6b91 100644 --- a/core/src/main/scala/cromwell/core/logging/JobLogger.scala +++ b/core/src/main/scala/cromwell/core/logging/JobLogger.scala @@ -9,7 +9,7 @@ trait JobLogging extends ActorLogging { this: Actor => def workflowId: WorkflowId def jobTag: String - lazy val jobLogger: Logger = new JobLogger(self.path.name, workflowId, jobTag, Option(log)) + lazy val jobLogger = new JobLogger(self.path.name, workflowId, jobTag, Option(log)) } /** diff --git a/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala b/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala index b79e9f7b610..ba339f01136 100644 --- a/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala +++ b/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala @@ -15,9 +15,9 @@ import org.slf4j.helpers.NOPLogger import org.slf4j.{Logger, LoggerFactory} trait WorkflowLogging extends ActorLogging { this: Actor => - def workflowId: WorkflowId + def workflowIdForLogging: WorkflowId - lazy val workflowLogger = new WorkflowLogger(self.path.name, workflowId, Option(log)) + lazy val workflowLogger = new WorkflowLogger(self.path.name, workflowIdForLogging, Option(log)) } object WorkflowLogger { @@ -113,10 +113,10 @@ class WorkflowLogger(loggerName: String, import WorkflowLogger._ - val workflowLogPath = workflowLogConfiguration.map(workflowLogConfigurationActual => + lazy val workflowLogPath = workflowLogConfiguration.map(workflowLogConfigurationActual => File(workflowLogConfigurationActual.dir).createDirectories() / s"workflow.$workflowId.log").map(_.path) - val fileLogger = workflowLogPath match { + lazy val fileLogger = workflowLogPath match { case Some(path) => makeFileLogger(path, Level.toLevel(sys.props.getOrElse("LOG_LEVEL", "debug"))) case None => NOPLogger.NOP_LOGGER } diff --git a/core/src/main/scala/cromwell/core/package.scala b/core/src/main/scala/cromwell/core/package.scala index 3334bfa246a..def878003a8 100644 --- a/core/src/main/scala/cromwell/core/package.scala +++ b/core/src/main/scala/cromwell/core/package.scala @@ -7,7 +7,7 @@ package object core { type FullyQualifiedName = String type WorkflowOutputs = Map[FullyQualifiedName, JobOutput] type WorkflowOptionsJson = String - type JobOutputs = Map[LocallyQualifiedName, JobOutput] + type CallOutputs = Map[LocallyQualifiedName, JobOutput] type HostInputs = Map[String, WdlValue] type EvaluatedRuntimeAttributes = Map[String, WdlValue] } diff --git a/core/src/main/scala/cromwell/core/path/CustomRetryParams.scala b/core/src/main/scala/cromwell/core/path/CustomRetryParams.scala new file mode 100644 index 00000000000..b27b9135ecb --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/CustomRetryParams.scala @@ -0,0 +1,25 @@ +package cromwell.core.path + +import cromwell.core.retry.{Backoff, SimpleExponentialBackoff} + +import scala.concurrent.duration.Duration +import scala.concurrent.duration._ +import scala.language.postfixOps + +object CustomRetryParams { + val Default = CustomRetryParams( + timeout = Duration.Inf, + maxRetries = Option(3), + backoff = SimpleExponentialBackoff(1 seconds, 3 seconds, 1.5D), + isTransient = throwableToFalse, + isFatal = throwableToFalse + ) + + def throwableToFalse(t: Throwable) = false +} + +case class CustomRetryParams(timeout: Duration, + maxRetries: Option[Int], + backoff: Backoff, + isTransient: Throwable => Boolean, + isFatal: Throwable => Boolean) diff --git a/core/src/main/scala/cromwell/core/path/DefaultPathBuilder.scala b/core/src/main/scala/cromwell/core/path/DefaultPathBuilder.scala new file mode 100644 index 00000000000..7dc60c1e854 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/DefaultPathBuilder.scala @@ -0,0 +1,21 @@ +package cromwell.core.path + +import java.net.URI +import java.nio.file.{FileSystems, Path} + +import scala.util.Try + +/** + * PathBuilder using the default FileSystem to attempt to build a Path. + */ +case object DefaultPathBuilder extends PathBuilder { + override def name = "Default" + + override def build(pathAsString: String): Try[Path] = Try { + val uri = URI.create(pathAsString) + val host = Option(uri.getHost) getOrElse "" + val path = Option(uri.getPath) getOrElse "" + + FileSystems.getDefault.getPath(host, path) + } +} diff --git a/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala b/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala new file mode 100644 index 00000000000..5339fae3cb8 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala @@ -0,0 +1,8 @@ +package cromwell.core.path + +import akka.actor.ActorSystem +import cromwell.core.WorkflowOptions + +case object DefaultPathBuilderFactory extends PathBuilderFactory { + override def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem) = DefaultPathBuilder +} diff --git a/core/src/main/scala/cromwell/core/path/JavaWriterImplicits.scala b/core/src/main/scala/cromwell/core/path/JavaWriterImplicits.scala new file mode 100644 index 00000000000..cc1b7f40dde --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/JavaWriterImplicits.scala @@ -0,0 +1,13 @@ +package cromwell.core.path + +import java.io.Writer + +object JavaWriterImplicits { + implicit class FlushingAndClosingWriter(writer: Writer) { + /** Convenience method to flush and close in one shot. */ + def flushAndClose() = { + writer.flush() + writer.close() + } + } +} diff --git a/core/src/main/scala/cromwell/core/path/PathBuilder.scala b/core/src/main/scala/cromwell/core/path/PathBuilder.scala new file mode 100644 index 00000000000..c213101922b --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathBuilder.scala @@ -0,0 +1,10 @@ +package cromwell.core.path + +import java.nio.file.Path + +import scala.util.Try + +trait PathBuilder { + def name: String + def build(pathAsString: String): Try[Path] +} diff --git a/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala b/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala new file mode 100644 index 00000000000..7ee20eb2dec --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala @@ -0,0 +1,11 @@ +package cromwell.core.path + +import akka.actor.ActorSystem +import cromwell.core.WorkflowOptions + +/** + * Provide a method that can instantiate a path builder with the specified workflow options. + */ +trait PathBuilderFactory { + def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem): PathBuilder +} diff --git a/core/src/main/scala/cromwell/core/PathCopier.scala b/core/src/main/scala/cromwell/core/path/PathCopier.scala similarity index 65% rename from core/src/main/scala/cromwell/core/PathCopier.scala rename to core/src/main/scala/cromwell/core/path/PathCopier.scala index f90dad60449..58eb290680b 100644 --- a/core/src/main/scala/cromwell/core/PathCopier.scala +++ b/core/src/main/scala/cromwell/core/path/PathCopier.scala @@ -1,4 +1,4 @@ -package cromwell.core +package cromwell.core.path import java.io.IOException import java.nio.file.Path @@ -8,8 +8,27 @@ import better.files._ import scala.util.{Failure, Try} object PathCopier { + + /* + * Remove p1 from p2 as long as they match. + */ + private def truncateCommonRoot(p1: Path, p2: Path): String = { + def names(p: Path) = 0 until p.getNameCount map p.getName + + val names1 = names(p1) + + val truncated = names(p2).zipWithIndex.dropWhile { + case (n1, n2) => n2 < names1.size && n1.equals(names1(n2)) + } map { _._1 } + + truncated match { + case empty if empty.isEmpty => "" + case truncs => truncs.reduceLeft(_.resolve(_)).toString + } + } + def getDestinationFilePath(sourceContextPath: Path, sourceFilePath: Path, destinationDirPath: Path): Path = { - val relativeFileString = sourceContextPath.toAbsolutePath.relativize(sourceFilePath.toAbsolutePath).toString + val relativeFileString = truncateCommonRoot(sourceContextPath.toAbsolutePath, sourceFilePath.toAbsolutePath) destinationDirPath.resolve(relativeFileString) } diff --git a/core/src/main/scala/cromwell/core/path/PathFactory.scala b/core/src/main/scala/cromwell/core/path/PathFactory.scala new file mode 100644 index 00000000000..ff050b55956 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathFactory.scala @@ -0,0 +1,57 @@ +package cromwell.core.path + +import java.nio.file.Path + +import better.files.File + +import scala.util.Success + +/** + * Convenience trait delegating to the PathFactory singleton + */ +trait PathFactory { + /** + * Path builders to be applied (in order) to attempt to build a java.nio.Path from a string. + */ + def pathBuilders: List[PathBuilder] + + /** + * Function applied after a string is successfully resolved to a java.nio.Path + */ + def postMapping(path: Path): Path = path + + /** + * Function applied before a string is attempted to be resolved to a java.nio.Path + */ + def preMapping(string: String): String = string + + /** + * Attempts to build a java.nio.Path from a String + */ + def buildPath(string: String): Path = PathFactory.buildPath(string, pathBuilders, preMapping, postMapping) + + /** + * Attempts to build a better.files.File from a String + */ + def buildFile(string: String): File = PathFactory.buildFile(string, pathBuilders, preMapping, postMapping) +} + +object PathFactory { + /** + * Attempts to build a java.nio.Path from a String + */ + def buildPath(string: String, + pathBuilders: List[PathBuilder], + preMapping: String => String = identity[String], + postMapping: Path => Path = identity[Path]): Path = { + pathBuilders.toStream map { _.build(preMapping(string)) } collectFirst { case Success(p) => postMapping(p) } getOrElse { + val pathBuilderNames: String = pathBuilders map { _.name } mkString ", " + throw new PathParsingException(s"Could not find suitable filesystem among $pathBuilderNames to parse $string.") + } + } + + def buildFile(string: String, + pathBuilders: List[PathBuilder], + preMapping: String => String = identity[String], + postMapping: Path => Path = identity[Path]): File = File(buildPath(string, pathBuilders, preMapping, postMapping)) +} diff --git a/core/src/main/scala/cromwell/core/path/PathImplicits.scala b/core/src/main/scala/cromwell/core/path/PathImplicits.scala new file mode 100644 index 00000000000..83d4bff7c6a --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathImplicits.scala @@ -0,0 +1,15 @@ +package cromwell.core.path + +import java.nio.file.Path + +object PathImplicits { + implicit class EnhancedPath(val path: Path) extends AnyVal { + def swapExt(oldExt: String, newExt: String): Path = { + path.getFileSystem.getPath(s"${path.toString.stripSuffix(oldExt)}$newExt") + } + + def untailed = UntailedWriter(path) + + def tailed(tailedSize: Int) = TailedWriter(path, tailedSize) + } +} diff --git a/core/src/main/scala/cromwell/core/path/PathParsingException.scala b/core/src/main/scala/cromwell/core/path/PathParsingException.scala new file mode 100644 index 00000000000..9bf6b5a7c99 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathParsingException.scala @@ -0,0 +1,5 @@ +package cromwell.core.path + +import cromwell.core.CromwellFatalException + +case class PathParsingException(message: String) extends CromwellFatalException(new IllegalArgumentException(message)) diff --git a/core/src/main/scala/cromwell/core/path/PathWriter.scala b/core/src/main/scala/cromwell/core/path/PathWriter.scala new file mode 100644 index 00000000000..e24fb74feb6 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathWriter.scala @@ -0,0 +1,76 @@ +package cromwell.core.path + +import java.io.Writer +import java.nio.file.Path + +import scala.collection.immutable.Queue + + + +/** + * Used with a `ProcessLogger`, writes lines with a newline. + */ +trait PathWriter { + import better.files._ + + val path: Path + lazy val writer: Writer = File(path).newBufferedWriter + + /** + * Passed to `ProcessLogger` to add a new line. + * + * @param string Line to add to the logs. + */ + def writeWithNewline(string: String): Unit = { + writer.write(string) + writer.write("\n") + } +} + +/** + * Used with a `ProcessLogger`, writes lines with a newline. + * + * @param path Path to the log file. + */ +case class UntailedWriter(path: Path) extends PathWriter + +/** + * Used with a `ProcessLogger`, queues up the `tailedSize` number of lines. + * + * @param path Path to the log file. + * @param tailedSize Maximum number of lines to save in the internal FIFO queue. + */ +case class TailedWriter(path: Path, tailedSize: Int) extends PathWriter { + private var isTailed = false + private var tailedLines: Queue[String] = Queue.empty + + /** + * Passed to `ProcessLogger` to add a new line, and adds the line to the tailed queue. + * + * @param string Line to add to the logs. + */ + override def writeWithNewline(string: String): Unit = { + tailedLines :+= string + while (tailedLines.size > tailedSize) { + tailedLines = tailedLines.takeRight(tailedSize) + isTailed = true + } + super.writeWithNewline(string) + } + + /** + * Returns a descriptive tail of the `path` and the last `tailedLines` written. + * + * @return a descriptive tail of the `path` and the last `tailedLines` written. + */ + def tailString: String = { + if (tailedLines.isEmpty) { + s"Contents of $path were empty." + } else if (isTailed) { + s"Last ${tailedLines.size} of $path:\n${tailedLines.mkString("\n")}" + } else { + s"Contents of $path:\n${tailedLines.mkString("\n")}" + } + } +} + diff --git a/core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala b/core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala new file mode 100644 index 00000000000..f9e9b581778 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala @@ -0,0 +1,25 @@ +package cromwell.core.path.proxy + +import java.lang.Iterable +import java.nio.file._ +import java.nio.file.attribute.UserPrincipalLookupService +import java.nio.file.spi.FileSystemProvider +import java.util + +class FileSystemProxy(delegate: FileSystem, injectedProvider: FileSystemProvider) extends FileSystem { + + override def provider(): FileSystemProvider = injectedProvider + + /* delegated */ + override def supportedFileAttributeViews(): util.Set[String] = delegate.supportedFileAttributeViews() + override def getSeparator: String = delegate.getSeparator + override def getRootDirectories: Iterable[Path] = delegate.getRootDirectories + override def newWatchService(): WatchService = delegate.newWatchService() + override def getFileStores: Iterable[FileStore] = delegate.getFileStores + override def isReadOnly: Boolean = delegate.isReadOnly + override def getPath(first: String, more: String*): Path = new PathProxy(delegate.getPath(first, more: _*), this) + override def isOpen: Boolean = delegate.isOpen + override def close(): Unit = delegate.close() + override def getPathMatcher(syntaxAndPattern: String): PathMatcher = delegate.getPathMatcher(syntaxAndPattern) + override def getUserPrincipalLookupService: UserPrincipalLookupService = delegate.getUserPrincipalLookupService +} diff --git a/core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala b/core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala new file mode 100644 index 00000000000..28428e0a3b4 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala @@ -0,0 +1,44 @@ +package cromwell.core.path.proxy + +import java.io.File +import java.net.URI +import java.nio.file.WatchEvent.{Kind, Modifier} +import java.nio.file._ +import java.util + +import scala.util.Try + +class PathProxy(delegate: Path, injectedFileSystem: FileSystem) extends Path { + def unbox[T](clazz: Class[T]): Try[T] = Try { + clazz.cast(delegate) + } + + override def getFileSystem: FileSystem = injectedFileSystem + + /* delegated */ + override def subpath(beginIndex: Int, endIndex: Int): Path = delegate.subpath(beginIndex, endIndex) + override def toFile: File = delegate.toFile + override def resolveSibling(other: Path): Path = delegate.resolveSibling(other) + override def resolveSibling(other: String): Path = delegate.resolveSibling(other) + override def isAbsolute: Boolean = delegate.isAbsolute + override def getName(index: Int): Path = delegate.getName(index) + override def getParent: Path = delegate.getParent + override def toAbsolutePath: Path = delegate.toAbsolutePath + override def relativize(other: Path): Path = delegate.relativize(other) + override def getNameCount: Int = delegate.getNameCount + override def toUri: URI = delegate.toUri + override def compareTo(other: Path): Int = delegate.compareTo(other) + override def register(watcher: WatchService, events: Array[Kind[_]], modifiers: Modifier*): WatchKey = delegate.register(watcher, events, modifiers: _*) + override def register(watcher: WatchService, events: Kind[_]*): WatchKey = delegate.register(watcher, events: _*) + override def getFileName: Path = delegate.getFileName + override def getRoot: Path = delegate.getRoot + override def iterator(): util.Iterator[Path] = delegate.iterator() + override def normalize(): Path = delegate.normalize() + override def endsWith(other: Path): Boolean = delegate.endsWith(other) + override def endsWith(other: String): Boolean = delegate.endsWith(other) + override def resolve(other: Path): Path = delegate.resolve(other) + override def resolve(other: String): Path = delegate.resolve(other) + override def startsWith(other: Path): Boolean = delegate.startsWith(other) + override def startsWith(other: String): Boolean = delegate.startsWith(other) + override def toRealPath(options: LinkOption*): Path = delegate.toRealPath(options: _*) +} diff --git a/core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala b/core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala new file mode 100644 index 00000000000..db3975292db --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala @@ -0,0 +1,57 @@ +package cromwell.core.path.proxy + +import java.net.URI +import java.nio.channels.SeekableByteChannel +import java.nio.file.DirectoryStream.Filter +import java.nio.file._ +import java.nio.file.attribute.{BasicFileAttributes, FileAttribute, FileAttributeView} +import java.nio.file.spi.FileSystemProvider +import java.util + +import akka.actor.ActorSystem +import cromwell.core.path.CustomRetryParams +import cromwell.core.retry.Retry + +import scala.concurrent.{Await, Future} + +class RetryableFileSystemProviderProxy[T <: FileSystemProvider](delegate: T, retryParams: CustomRetryParams = CustomRetryParams.Default)(implicit actorSystem: ActorSystem) extends FileSystemProvider { + private val iOExecutionContext = actorSystem.dispatchers.lookup("akka.dispatchers.io-dispatcher") + + // the nio interface is synchronous so we need to wait for the result + def withRetry[U](f: () => U): U = Await.result( + Retry.withRetry( + () => Future(f())(iOExecutionContext), + retryParams.maxRetries, + retryParams.backoff, + retryParams.isTransient, + retryParams.isFatal + ), + retryParams.timeout + ) + + override def getPath(uri: URI): Path = { + val path = delegate.getPath(uri) + new PathProxy(path, new FileSystemProxy(path.getFileSystem, this)) + } + override def newFileSystem(uri: URI, env: util.Map[String, _]): FileSystem = { + new FileSystemProxy(delegate.newFileSystem(uri, env), this) + } + override def getScheme: String = delegate.getScheme + override def getFileSystem(uri: URI): FileSystem = delegate.getFileSystem(uri) + override def getFileStore(path: Path): FileStore = delegate.getFileStore(path) + + /* retried operations */ + override def move(source: Path, target: Path, options: CopyOption*): Unit = withRetry { () => delegate.move(source, target, options: _*) } + override def checkAccess(path: Path, modes: AccessMode*): Unit = withRetry { () => delegate.checkAccess(path, modes: _*) } + override def createDirectory(dir: Path, attrs: FileAttribute[_]*): Unit = withRetry { () => delegate.createDirectory(dir, attrs: _*) } + override def newByteChannel(path: Path, options: util.Set[_ <: OpenOption], attrs: FileAttribute[_]*): SeekableByteChannel = withRetry { () => delegate.newByteChannel(path, options, attrs: _*) } + override def isHidden(path: Path): Boolean = withRetry { () => delegate.isHidden(path) } + override def copy(source: Path, target: Path, options: CopyOption*): Unit = withRetry { () => delegate.copy(source, target, options: _*) } + override def delete(path: Path): Unit = withRetry { () => delegate.delete(path) } + override def newDirectoryStream(dir: Path, filter: Filter[_ >: Path]): DirectoryStream[Path] = withRetry { () => delegate.newDirectoryStream(dir, filter) } + override def setAttribute(path: Path, attribute: String, value: scala.Any, options: LinkOption*): Unit = withRetry { () => delegate.setAttribute(path, attribute, value, options: _*) } + override def readAttributes[A <: BasicFileAttributes](path: Path, `type`: Class[A], options: LinkOption*): A = withRetry { () => delegate.readAttributes(path, `type`, options: _*) } + override def readAttributes(path: Path, attributes: String, options: LinkOption*): util.Map[String, AnyRef] = withRetry { () => delegate.readAttributes(path, attributes, options: _*) } + override def isSameFile(path: Path, path2: Path): Boolean = withRetry { () => delegate.isSameFile(path, path2) } + override def getFileAttributeView[V <: FileAttributeView](path: Path, `type`: Class[V], options: LinkOption*): V = withRetry { () => delegate.getFileAttributeView(path, `type`, options: _*) } +} diff --git a/core/src/main/scala/cromwell/core/retry/Backoff.scala b/core/src/main/scala/cromwell/core/retry/Backoff.scala index af07de59532..70b3f82f4dc 100644 --- a/core/src/main/scala/cromwell/core/retry/Backoff.scala +++ b/core/src/main/scala/cromwell/core/retry/Backoff.scala @@ -4,7 +4,7 @@ import com.google.api.client.util.ExponentialBackOff import scala.concurrent.duration.{Duration, FiniteDuration} -sealed trait Backoff { +trait Backoff { /** Next interval in millis */ def backoffMillis: Long /** Get the next instance of backoff. This should be called after every call to backoffMillis */ diff --git a/core/src/main/scala/cromwell/core/retry/Retry.scala b/core/src/main/scala/cromwell/core/retry/Retry.scala index 7ac181129c0..002a8d6e56f 100644 --- a/core/src/main/scala/cromwell/core/retry/Retry.scala +++ b/core/src/main/scala/cromwell/core/retry/Retry.scala @@ -25,7 +25,7 @@ object Retry { */ def withRetry[A](f: () => Future[A], maxRetries: Option[Int] = Option(10), - backoff: SimpleExponentialBackoff = SimpleExponentialBackoff(5 seconds, 10 seconds, 1.1D), + backoff: Backoff = SimpleExponentialBackoff(5 seconds, 10 seconds, 1.1D), isTransient: Throwable => Boolean = throwableToFalse, isFatal: Throwable => Boolean = throwableToFalse) (implicit actorSystem: ActorSystem): Future[A] = { @@ -38,7 +38,7 @@ object Retry { case throwable if isFatal(throwable) => Future.failed(new CromwellFatalException(throwable)) case throwable if !isFatal(throwable) => val retriesLeft = if (isTransient(throwable)) maxRetries else maxRetries map { _ - 1 } - after(delay, actorSystem.scheduler)(withRetry(f, backoff = backoff, maxRetries = retriesLeft)) + after(delay, actorSystem.scheduler)(withRetry(f, backoff = backoff, maxRetries = retriesLeft, isTransient = isTransient, isFatal = isFatal)) } } else f() recoverWith { case e: Exception => Future.failed(new CromwellFatalException(e)) diff --git a/core/src/main/scala/cromwell/core/simpleton/WdlValueBuilder.scala b/core/src/main/scala/cromwell/core/simpleton/WdlValueBuilder.scala index d4e04dd0bba..774c1b5ceb8 100644 --- a/core/src/main/scala/cromwell/core/simpleton/WdlValueBuilder.scala +++ b/core/src/main/scala/cromwell/core/simpleton/WdlValueBuilder.scala @@ -1,11 +1,11 @@ package cromwell.core.simpleton import wdl4s.TaskOutput -import wdl4s.types.{WdlArrayType, WdlMapType, WdlPrimitiveType, WdlType} -import wdl4s.values.{WdlArray, WdlMap, WdlValue} +import wdl4s.types._ +import wdl4s.values.{WdlArray, WdlMap, WdlOptionalValue, WdlPair, WdlValue} import scala.language.postfixOps -import cromwell.core.{JobOutput, JobOutputs} +import cromwell.core.{CallOutputs, JobOutput} import cromwell.core.simpleton.WdlValueSimpleton._ @@ -73,6 +73,19 @@ object WdlValueBuilder { component.path match { case MapElementPattern(key, more) => key.unescapeMeta -> component.copy(path = more)} } + // Returns a tuple of the key into the pair (i.e. left or right) and a `SimpletonComponent` whose path reflects the "descent" + // into the pair. e.g. for a component + // SimpletonComponent(":left:foo", someValue) this would return (PairLeft -> SimpletonComponent(":baz", someValue)). + sealed trait PairLeftOrRight + case object PairLeft extends PairLeftOrRight + case object PairRight extends PairLeftOrRight + def descendIntoPair(component: SimpletonComponent): (PairLeftOrRight, SimpletonComponent) = { + component.path match { + case MapElementPattern("left", more) => PairLeft -> component.copy(path = more) + case MapElementPattern("right", more) => PairRight -> component.copy(path = more) + } + } + // Group tuples by key using a Map with key type `K`. def group[K](tuples: Traversable[(K, SimpletonComponent)]): Map[K, Traversable[SimpletonComponent]] = { tuples groupBy { case (i, _) => i } mapValues { _ map { case (i, s) => s} } @@ -80,6 +93,12 @@ object WdlValueBuilder { outputType match { case _: WdlPrimitiveType => components collectFirst { case SimpletonComponent(_, v) => v } get + case opt: WdlOptionalType => + if (components.isEmpty) { + WdlOptionalValue(opt.memberType, None) + } else { + WdlOptionalValue(toWdlValue(opt.memberType, components)) + } case arrayType: WdlArrayType => val groupedByArrayIndex: Map[Int, Traversable[SimpletonComponent]] = group(components map descendIntoArray) WdlArray(arrayType, groupedByArrayIndex.toList.sortBy(_._1) map { case (_, s) => toWdlValue(arrayType.memberType, s) }) @@ -87,6 +106,9 @@ object WdlValueBuilder { val groupedByMapKey: Map[String, Traversable[SimpletonComponent]] = group(components map descendIntoMap) // map keys are guaranteed by the WDL spec to be primitives, so the "coerceRawValue(..).get" is safe. WdlMap(mapType, groupedByMapKey map { case (k, ss) => mapType.keyType.coerceRawValue(k).get -> toWdlValue(mapType.valueType, ss) }) + case pairType: WdlPairType => + val groupedByLeftOrRight: Map[PairLeftOrRight, Traversable[SimpletonComponent]] = group(components map descendIntoPair) + WdlPair(toWdlValue(pairType.leftType, groupedByLeftOrRight(PairLeft)), toWdlValue(pairType.rightType, groupedByLeftOrRight(PairRight))) } } @@ -107,7 +129,7 @@ object WdlValueBuilder { */ private case class SimpletonComponent(path: String, value: WdlValue) - def toJobOutputs(taskOutputs: Traversable[TaskOutput], simpletons: Traversable[WdlValueSimpleton]): JobOutputs = { + def toJobOutputs(taskOutputs: Traversable[TaskOutput], simpletons: Traversable[WdlValueSimpleton]): CallOutputs = { toWdlValues(taskOutputs, simpletons) mapValues JobOutput.apply } @@ -119,7 +141,7 @@ object WdlValueBuilder { // This is meant to "rehydrate" simpletonized WdlValues back to WdlValues. It is assumed that these WdlValues were // "dehydrated" to WdlValueSimpletons correctly. This code is not robust to corrupt input whatsoever. - val types = taskOutputs map { o => o.name -> o.wdlType } toMap + val types = taskOutputs map { o => o.unqualifiedName -> o.wdlType } toMap val simpletonsByOutputName = simpletons groupBy { _.simpletonKey match { case IdentifierAndPathPattern(i, _) => i } } val simpletonComponentsByOutputName = simpletonsByOutputName map { case (name, ss) => name -> (ss map simpletonToComponent(name)) } types map { case (name, outputType) => name -> toWdlValue(outputType, simpletonComponentsByOutputName(name))} diff --git a/core/src/main/scala/cromwell/core/simpleton/WdlValueSimpleton.scala b/core/src/main/scala/cromwell/core/simpleton/WdlValueSimpleton.scala index c417ad3ee0a..1f5e0437554 100644 --- a/core/src/main/scala/cromwell/core/simpleton/WdlValueSimpleton.scala +++ b/core/src/main/scala/cromwell/core/simpleton/WdlValueSimpleton.scala @@ -26,8 +26,10 @@ object WdlValueSimpleton { implicit class WdlValueSimplifier(wdlValue: WdlValue) { def simplify(name: String): Iterable[WdlValueSimpleton] = wdlValue match { case prim: WdlPrimitive => List(WdlValueSimpleton(name, prim)) + case opt: WdlOptionalValue => opt.value.map(_.simplify(name)).getOrElse(Seq.empty) case WdlArray(_, arrayValue) => arrayValue.zipWithIndex flatMap { case (arrayItem, index) => arrayItem.simplify(s"$name[$index]") } case WdlMap(_, mapValue) => mapValue flatMap { case (key, value) => value.simplify(s"$name:${key.valueString.escapeMeta}") } + case WdlPair(left, right) => left.simplify(s"$name:left") ++ right.simplify(s"$name:right") case wdlObject: WdlObjectLike => wdlObject.value flatMap { case (key, value) => value.simplify(s"$name:${key.escapeMeta}") } case other => throw new Exception(s"Cannot simplify wdl value $other of type ${other.wdlType}") } diff --git a/core/src/main/scala/cromwell/util/JsonFormatting/WdlValueJsonFormatter.scala b/core/src/main/scala/cromwell/util/JsonFormatting/WdlValueJsonFormatter.scala index 9d8638be0e6..8a997efb0a8 100644 --- a/core/src/main/scala/cromwell/util/JsonFormatting/WdlValueJsonFormatter.scala +++ b/core/src/main/scala/cromwell/util/JsonFormatting/WdlValueJsonFormatter.scala @@ -17,6 +17,7 @@ object WdlValueJsonFormatter extends DefaultJsonProtocol { case a: WdlArray => new JsArray(a.value.map(write).toVector) case m: WdlMap => new JsObject(m.value map {case(k,v) => k.valueString -> write(v)}) case e: WdlExpression => JsString(e.toWdlString) + case q: WdlPair => new JsObject(Map("left" -> write(q.left), "right" -> write(q.right))) } // NOTE: This assumes a map's keys are strings. Since we're coming from JSON this is fine. diff --git a/core/src/main/scala/cromwell/util/PromiseActor.scala b/core/src/main/scala/cromwell/util/PromiseActor.scala index 6813a53c7ef..58aea267a03 100644 --- a/core/src/main/scala/cromwell/util/PromiseActor.scala +++ b/core/src/main/scala/cromwell/util/PromiseActor.scala @@ -4,6 +4,28 @@ import akka.actor._ import scala.concurrent.{Future, Promise} +private class PromiseActor(promise: Promise[Any], sendTo: ActorRef, msg: Any) extends Actor with ActorLogging { + + context.watch(sendTo) + sendTo ! msg + + override def receive = { + case Status.Failure(f) => + promise.tryFailure(f) + context.stop(self) + case Terminated(actorRef) => + if (actorRef == sendTo) { + promise.tryFailure(new RuntimeException("Promise-watched actor completed before sending back a message")) + } else { + log.error("Spooky happenstances! A Terminated({}) message was sent to a private Promise actor which wasn't watching it!?", actorRef) + } + context.stop(self) + case success => + promise.trySuccess(success) + context.stop(self) + } +} + object PromiseActor { /** * Sends a message to an actor and returns the future associated with the fullfilment of the reply @@ -16,12 +38,11 @@ object PromiseActor { */ private def askNoTimeout(message: Any, sendTo: ActorRef)(implicit actorRefFactory: ActorRefFactory): Future[Any] = { val promise = Promise[Any]() - val promiseActor = actorRefFactory.actorOf(props(promise)) - sendTo.tell(message, promiseActor) + val _ = actorRefFactory.actorOf(props(promise, sendTo, message)) promise.future } - def props(promise: Promise[Any]): Props = Props(new PromiseActor(promise)) + def props(promise: Promise[Any], sendTo: ActorRef, msg: Any): Props = Props(new PromiseActor(promise, sendTo, msg)) implicit class EnhancedActorRef(val actorRef: ActorRef) extends AnyVal { def askNoTimeout(message: Any)(implicit actorRefFactory: ActorRefFactory): Future[Any] = { @@ -29,14 +50,3 @@ object PromiseActor { } } } - -private class PromiseActor(promise: Promise[Any]) extends Actor { - override def receive = { - case Status.Failure(f) => - promise.tryFailure(f) - context.stop(self) - case success => - promise.trySuccess(success) - context.stop(self) - } -} diff --git a/core/src/main/scala/cromwell/util/StopAndLogSupervisor.scala b/core/src/main/scala/cromwell/util/StopAndLogSupervisor.scala new file mode 100644 index 00000000000..4b64e5d8bc0 --- /dev/null +++ b/core/src/main/scala/cromwell/util/StopAndLogSupervisor.scala @@ -0,0 +1,24 @@ +package cromwell.util + +import akka.actor.SupervisorStrategy.{Decider, Stop} +import akka.actor.{Actor, ActorRef, OneForOneStrategy, SupervisorStrategy} +import cromwell.core.logging.WorkflowLogging + +trait StopAndLogSupervisor { this: Actor with WorkflowLogging => + + private var failureLog: Map[ActorRef, Throwable] = Map.empty + + final val stopAndLogStrategy: SupervisorStrategy = { + def stoppingDecider: Decider = { + case e: Exception => + val failer = sender() + failureLog += failer -> e + Stop + } + OneForOneStrategy()(stoppingDecider) + } + + final def getFailureCause(actorRef: ActorRef): Option[Throwable] = failureLog.get(actorRef) + + override final val supervisorStrategy = stopAndLogStrategy +} diff --git a/core/src/main/scala/cromwell/util/TryUtil.scala b/core/src/main/scala/cromwell/util/TryUtil.scala new file mode 100644 index 00000000000..18f7ea58a3f --- /dev/null +++ b/core/src/main/scala/cromwell/util/TryUtil.scala @@ -0,0 +1,45 @@ +package cromwell.util + +import java.io.{PrintWriter, StringWriter} + +import lenthall.exception.ThrowableAggregation + +import scala.util.{Success, Failure, Try} + +case class AggregatedException(exceptions: Seq[Throwable], prefixError: String = "") extends ThrowableAggregation { + override def throwables: Traversable[Throwable] = exceptions + override def exceptionContext: String = prefixError +} + +object TryUtil { + private def stringifyFailure(failure: Try[Any]): String = { + val stringWriter = new StringWriter() + val writer = new PrintWriter(stringWriter) + failure recover { case e => e.printStackTrace(writer) } + writer.flush() + writer.close() + stringWriter.toString + } + + def stringifyFailures[T](possibleFailures: Traversable[Try[T]]): Traversable[String] = + possibleFailures.collect { case failure: Failure[T] => stringifyFailure(failure) } + + private def sequenceIterable[T](tries: Iterable[Try[_]], unbox: () => T, prefixErrorMessage: String) = { + tries collect { case f: Failure[_] => f } match { + case failures if failures.nonEmpty => + val exceptions = failures.toSeq.map(_.exception) + Failure(AggregatedException(exceptions, prefixErrorMessage)) + case _ => Success(unbox()) + } + } + + def sequence[T](tries: Seq[Try[T]], prefixErrorMessage: String = ""): Try[Seq[T]] = { + def unbox = tries map { _.get } + sequenceIterable(tries, unbox _, prefixErrorMessage) + } + + def sequenceMap[T, U](tries: Map[T, Try[U]], prefixErrorMessage: String = ""): Try[Map[T, U]] = { + def unbox = tries mapValues { _.get } + sequenceIterable(tries.values, unbox _, prefixErrorMessage) + } +} diff --git a/core/src/test/scala/cromwell/core/path/RetryableFileSystemProxySpec.scala b/core/src/test/scala/cromwell/core/path/RetryableFileSystemProxySpec.scala new file mode 100644 index 00000000000..88dbf463dca --- /dev/null +++ b/core/src/test/scala/cromwell/core/path/RetryableFileSystemProxySpec.scala @@ -0,0 +1,278 @@ +package cromwell.core.path + +import java.io.FileNotFoundException +import java.nio.channels.SeekableByteChannel +import java.nio.file.DirectoryStream.Filter +import java.nio.file.attribute.{BasicFileAttributes, FileAttributeView} +import java.nio.file.spi.FileSystemProvider +import java.nio.file.{DirectoryStream, OpenOption, Path, StandardOpenOption} +import java.util.concurrent.TimeoutException + +import cromwell.core.path.proxy.RetryableFileSystemProviderProxy +import cromwell.core.retry.Backoff +import cromwell.core.{CromwellFatalException, TestKitSuite} +import org.mockito.Matchers._ +import org.mockito.Mockito._ +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.duration._ +import scala.language.postfixOps + +class RetryableFileSystemProxySpec extends TestKitSuite with FlatSpecLike with Matchers { + + behavior of "RetryableFileSystemProxySpec" + + case class ThrowParams(exception: Exception, nbTimes: Int) + + abstract class FileSystemAnswer[T](delay: Option[Duration] = None, + throws: Option[ThrowParams] = None) extends Answer[T] { + + var nbThrows = 0 + + def delayAndOrThrow() = { + delay foreach { d => Thread.sleep(d.toMillis) } + throws foreach { e => + if (nbThrows < e.nbTimes) { + nbThrows = nbThrows + 1 + throw e.exception + } + } + } + } + + def mockFileSystem(delay: Option[Duration] = None, + throws: Option[ThrowParams] = None): FileSystemProvider = { + + val provider = mock(classOf[FileSystemProvider]) + + def answerUnit: Answer[Unit] = new FileSystemAnswer[Unit](delay, throws) { + override def answer(invocation: InvocationOnMock): Unit = delayAndOrThrow() + } + + def answerBoolean: Answer[Boolean] = new FileSystemAnswer[Boolean](delay, throws) { + override def answer(invocation: InvocationOnMock): Boolean = { + delayAndOrThrow() + true + } + } + + def answerSeekableByteChannel: Answer[SeekableByteChannel] = new FileSystemAnswer[SeekableByteChannel](delay, throws) { + override def answer(invocation: InvocationOnMock): SeekableByteChannel = { + delayAndOrThrow() + mock(classOf[SeekableByteChannel]) + } + } + + def answerDirectoryStream: Answer[DirectoryStream[Path]] = new FileSystemAnswer[DirectoryStream[Path]](delay, throws) { + override def answer(invocation: InvocationOnMock): DirectoryStream[Path] = { + delayAndOrThrow() + mock(classOf[DirectoryStream[Path]]) + } + } + + def answerBasicFileAttributes: Answer[BasicFileAttributes] = new FileSystemAnswer[BasicFileAttributes](delay, throws) { + override def answer(invocation: InvocationOnMock): BasicFileAttributes = { + delayAndOrThrow() + mock(classOf[BasicFileAttributes]) + } + } + + def answerMap: Answer[java.util.Map[String, AnyRef]] = new FileSystemAnswer[java.util.Map[String, AnyRef]](delay, throws) { + override def answer(invocation: InvocationOnMock): java.util.Map[String, AnyRef] = { + delayAndOrThrow() + new java.util.HashMap[String, AnyRef]() + } + } + + def answerFileAttributeView: Answer[FileAttributeView] = new FileSystemAnswer[FileAttributeView](delay, throws) { + override def answer(invocation: InvocationOnMock): FileAttributeView = { + delayAndOrThrow() + mock(classOf[FileAttributeView]) + } + } + + when(provider.move(any[Path], any[Path])).thenAnswer(answerUnit) + when(provider.checkAccess(any[Path])).thenAnswer(answerUnit) + when(provider.createDirectory(any[Path])).thenAnswer(answerUnit) + when(provider.newByteChannel(any[Path], any[java.util.Set[OpenOption]])).thenAnswer(answerSeekableByteChannel) + when(provider.isHidden(any[Path])).thenAnswer(answerBoolean) + when(provider.copy(any[Path], any[Path])).thenAnswer(answerUnit) + when(provider.delete(any[Path])).thenAnswer(answerUnit) + when(provider.newDirectoryStream(any[Path], any[Filter[Path]]())).thenAnswer(answerDirectoryStream) + when(provider.setAttribute(any[Path], any[String], any[Object])).thenAnswer(answerUnit) + when(provider.readAttributes(any[Path], any[String])).thenAnswer(answerMap) + when(provider.readAttributes(any[Path], any[Class[BasicFileAttributes]])).thenAnswer(answerBasicFileAttributes) + when(provider.isSameFile(any[Path], any[Path])).thenAnswer(answerBoolean) + when(provider.getFileAttributeView(any[Path], any[Class[FileAttributeView]])).thenAnswer(answerFileAttributeView) + + provider + } + + val testRetryParams = CustomRetryParams.Default.copy(backoff = new Backoff { + override def next: Backoff = this + override def backoffMillis: Long = 0 + }) + + val pathMock = mock(classOf[Path]) + + it should "timeout if the operation takes too long" ignore { + val retryParams = testRetryParams.copy(timeout = 100 millis) + val mockFs = mockFileSystem(delay = Option(200 millis)) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + a[TimeoutException] shouldBe thrownBy(retryableFs.move(pathMock, pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.checkAccess(pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.createDirectory(pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]]))) + a[TimeoutException] shouldBe thrownBy(retryableFs.isHidden(pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.copy(pathMock, pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.delete(pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]]))) + a[TimeoutException] shouldBe thrownBy(retryableFs.setAttribute(pathMock, "", "")) + a[TimeoutException] shouldBe thrownBy(retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes])) + a[TimeoutException] shouldBe thrownBy(retryableFs.readAttributes(pathMock, "")) + a[TimeoutException] shouldBe thrownBy(retryableFs.isSameFile(pathMock, pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView])) + } + + it should "retry on failure and finally succeed if under retry max" in { + val retryParams = testRetryParams.copy(maxRetries = Option(4)) + val mockFs = mockFileSystem(throws = Option(ThrowParams(new Exception(), nbTimes = 2))) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + retryableFs.move(pathMock, pathMock) + retryableFs.checkAccess(pathMock) + retryableFs.createDirectory(pathMock) + retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]])) + retryableFs.isHidden(pathMock) + retryableFs.copy(pathMock, pathMock) + retryableFs.delete(pathMock) + retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]])) + retryableFs.setAttribute(pathMock, "", "") + retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes]) + retryableFs.readAttributes(pathMock, "") + retryableFs.isSameFile(pathMock, pathMock) + retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView]) + + verify(mockFs, times(3)).move(any[Path], any[Path]) + verify(mockFs, times(3)).checkAccess(any[Path]) + verify(mockFs, times(3)).createDirectory(any[Path]) + verify(mockFs, times(3)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) + verify(mockFs, times(3)).isHidden(any[Path]) + verify(mockFs, times(3)).copy(any[Path], any[Path]) + verify(mockFs, times(3)).delete(any[Path]) + verify(mockFs, times(3)).newDirectoryStream(any[Path], any[Filter[Path]]()) + verify(mockFs, times(3)).setAttribute(any[Path], any[String], any[Object]) + verify(mockFs, times(3)).readAttributes(any[Path], any[String]) + verify(mockFs, times(3)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) + verify(mockFs, times(3)).isSameFile(any[Path], any[Path]) + verify(mockFs, times(3)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) + } + + it should "retry on failure and fail if over retry max" in { + val retryParams = testRetryParams.copy(maxRetries = Option(2)) + val mockFs = mockFileSystem(throws = Option(ThrowParams(new IllegalArgumentException(), nbTimes = 3))) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + (the [CromwellFatalException] thrownBy retryableFs.move(pathMock, pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.checkAccess(pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.createDirectory(pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]]))).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.isHidden(pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.copy(pathMock, pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.delete(pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]]))).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.setAttribute(pathMock, "", "")).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes])).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, "")).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.isSameFile(pathMock, pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView])).getCause shouldBe a[IllegalArgumentException] + + verify(mockFs, times(3)).move(any[Path], any[Path]) + verify(mockFs, times(3)).checkAccess(any[Path]) + verify(mockFs, times(3)).createDirectory(any[Path]) + verify(mockFs, times(3)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) + verify(mockFs, times(3)).isHidden(any[Path]) + verify(mockFs, times(3)).copy(any[Path], any[Path]) + verify(mockFs, times(3)).delete(any[Path]) + verify(mockFs, times(3)).newDirectoryStream(any[Path], any[Filter[Path]]()) + verify(mockFs, times(3)).setAttribute(any[Path], any[String], any[Object]) + verify(mockFs, times(3)).readAttributes(any[Path], any[String]) + verify(mockFs, times(3)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) + verify(mockFs, times(3)).isSameFile(any[Path], any[Path]) + verify(mockFs, times(3)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) + } + + it should "ignore transient exceptions" in { + def isTransient(t: Throwable) = t.isInstanceOf[FileNotFoundException] + val retryParams = testRetryParams.copy(maxRetries = Option(1), isTransient = isTransient) + val mockFs = mockFileSystem(throws = Option(ThrowParams(new FileNotFoundException(), nbTimes = 2))) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + retryableFs.move(pathMock, pathMock) + retryableFs.checkAccess(pathMock) + retryableFs.createDirectory(pathMock) + retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]])) + retryableFs.isHidden(pathMock) + retryableFs.copy(pathMock, pathMock) + retryableFs.delete(pathMock) + retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]])) + retryableFs.setAttribute(pathMock, "", "") + retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes]) + retryableFs.readAttributes(pathMock, "") + retryableFs.isSameFile(pathMock, pathMock) + retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView]) + + verify(mockFs, times(3)).move(any[Path], any[Path]) + verify(mockFs, times(3)).checkAccess(any[Path]) + verify(mockFs, times(3)).createDirectory(any[Path]) + verify(mockFs, times(3)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) + verify(mockFs, times(3)).isHidden(any[Path]) + verify(mockFs, times(3)).copy(any[Path], any[Path]) + verify(mockFs, times(3)).delete(any[Path]) + verify(mockFs, times(3)).newDirectoryStream(any[Path], any[Filter[Path]]()) + verify(mockFs, times(3)).setAttribute(any[Path], any[String], any[Object]) + verify(mockFs, times(3)).readAttributes(any[Path], any[String]) + verify(mockFs, times(3)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) + verify(mockFs, times(3)).isSameFile(any[Path], any[Path]) + verify(mockFs, times(3)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) + } + + it should "fail immediately on fatal exceptions" in { + def isFatal(t: Throwable) = t.isInstanceOf[FileNotFoundException] + val retryParams = testRetryParams.copy(maxRetries = Option(5), isFatal = isFatal) + val mockFs = mockFileSystem(throws = Option(ThrowParams(new FileNotFoundException(), nbTimes = 3))) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + (the [CromwellFatalException] thrownBy retryableFs.move(pathMock, pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.checkAccess(pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.createDirectory(pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]]))).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.isHidden(pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.copy(pathMock, pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.delete(pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]]))).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.setAttribute(pathMock, "", "")).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes])).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, "")).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.isSameFile(pathMock, pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView])).getCause shouldBe a[FileNotFoundException] + + verify(mockFs, times(1)).move(any[Path], any[Path]) + verify(mockFs, times(1)).checkAccess(any[Path]) + verify(mockFs, times(1)).createDirectory(any[Path]) + verify(mockFs, times(1)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) + verify(mockFs, times(1)).isHidden(any[Path]) + verify(mockFs, times(1)).copy(any[Path], any[Path]) + verify(mockFs, times(1)).delete(any[Path]) + verify(mockFs, times(1)).newDirectoryStream(any[Path], any[Filter[Path]]()) + verify(mockFs, times(1)).setAttribute(any[Path], any[String], any[Object]) + verify(mockFs, times(1)).readAttributes(any[Path], any[String]) + verify(mockFs, times(1)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) + verify(mockFs, times(1)).isSameFile(any[Path], any[Path]) + verify(mockFs, times(1)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) + } + +} diff --git a/core/src/test/scala/cromwell/core/simpleton/WdlValueBuilderSpec.scala b/core/src/test/scala/cromwell/core/simpleton/WdlValueBuilderSpec.scala index eab604c96cb..1e558ceb1dd 100644 --- a/core/src/test/scala/cromwell/core/simpleton/WdlValueBuilderSpec.scala +++ b/core/src/test/scala/cromwell/core/simpleton/WdlValueBuilderSpec.scala @@ -2,8 +2,10 @@ package cromwell.core.simpleton import cromwell.core.simpleton.WdlValueBuilderSpec._ import org.scalatest.{FlatSpec, Matchers} +import org.specs2.mock.Mockito +import wdl4s.parser.WdlParser.Ast import wdl4s.types.{WdlArrayType, WdlIntegerType, WdlMapType, WdlStringType} -import wdl4s.values.{WdlArray, WdlInteger, WdlMap, WdlString} +import wdl4s.values.{WdlArray, WdlInteger, WdlMap, WdlPair, WdlString, WdlValue} import wdl4s.{TaskOutput, WdlExpression} object WdlValueBuilderSpec { @@ -11,41 +13,118 @@ object WdlValueBuilderSpec { val IgnoredExpression = WdlExpression.fromString(""" "" """) } -class WdlValueBuilderSpec extends FlatSpec with Matchers { +class WdlValueBuilderSpec extends FlatSpec with Matchers with Mockito { - "Builder" should "build" in { - - val wdlValues = Map( - "foo" -> WdlString("none"), - "bar" -> WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(2))), - "baz" -> WdlArray(WdlArrayType(WdlArrayType(WdlIntegerType)), List( + case class SimpletonConversion(name: String, wdlValue: WdlValue, simpletons: Seq[WdlValueSimpleton]) + val simpletonConversions = List( + SimpletonConversion("foo", WdlString("none"), List(WdlValueSimpleton("foo", WdlString("none")))), + SimpletonConversion("bar", WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(2))), List(WdlValueSimpleton("bar[0]", WdlInteger(1)), WdlValueSimpleton("bar[1]", WdlInteger(2)))), + SimpletonConversion( + "baz", + WdlArray(WdlArrayType(WdlArrayType(WdlIntegerType)), List( WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(0), WdlInteger(1))), WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(2), WdlInteger(3))))), - "map" -> WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( + List(WdlValueSimpleton("baz[0][0]", WdlInteger(0)), WdlValueSimpleton("baz[0][1]", WdlInteger(1)), WdlValueSimpleton("baz[1][0]", WdlInteger(2)), WdlValueSimpleton("baz[1][1]", WdlInteger(3))) + ), + SimpletonConversion( + "map", + WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( WdlString("foo") -> WdlString("foo"), - WdlString("bar") -> WdlString("bar")) - ), - "map2" -> WdlMap(WdlMapType(WdlStringType, WdlMapType(WdlStringType, WdlStringType)), Map( - WdlString("foo") -> - WdlMap(WdlMapType(WdlStringType, WdlStringType), Map(WdlString("foo2") -> WdlString("foo"))), - WdlString("bar") -> - WdlMap(WdlMapType(WdlStringType, WdlStringType), Map(WdlString("bar2") -> WdlString("bar"))) - )), - "map3" -> WdlMap(WdlMapType(WdlStringType, WdlArrayType(WdlIntegerType)), Map( + WdlString("bar") -> WdlString("bar"))), + List(WdlValueSimpleton("map:foo", WdlString("foo")), WdlValueSimpleton("map:bar", WdlString("bar"))) + ), + SimpletonConversion( + "mapOfMaps", + WdlMap(WdlMapType(WdlStringType, WdlMapType(WdlStringType, WdlStringType)), Map( + WdlString("foo") -> WdlMap(WdlMapType(WdlStringType, WdlStringType), Map(WdlString("foo2") -> WdlString("foo"))), + WdlString("bar") ->WdlMap(WdlMapType(WdlStringType, WdlStringType), Map(WdlString("bar2") -> WdlString("bar"))))), + List(WdlValueSimpleton("mapOfMaps:foo:foo2", WdlString("foo")), WdlValueSimpleton("mapOfMaps:bar:bar2", WdlString("bar"))) + ), + SimpletonConversion( + "simplePair1", + WdlPair(WdlInteger(1), WdlString("hello")), + List(WdlValueSimpleton("simplePair1:left", WdlInteger(1)), WdlValueSimpleton("simplePair1:right", WdlString("hello"))) + ), + SimpletonConversion( + "simplePair2", + WdlPair(WdlString("left"), WdlInteger(5)), + List(WdlValueSimpleton("simplePair2:left", WdlString("left")), WdlValueSimpleton("simplePair2:right", WdlInteger(5))) + ), + SimpletonConversion( + "pairOfPairs", + WdlPair( + WdlPair(WdlInteger(1), WdlString("one")), + WdlPair(WdlString("two"), WdlInteger(2))), + List( + WdlValueSimpleton("pairOfPairs:left:left", WdlInteger(1)), + WdlValueSimpleton("pairOfPairs:left:right", WdlString("one")), + WdlValueSimpleton("pairOfPairs:right:left", WdlString("two")), + WdlValueSimpleton("pairOfPairs:right:right", WdlInteger(2))) + ), + SimpletonConversion( + "pairOfArrayAndMap", + WdlPair( + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(2))), + WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map(WdlString("left") -> WdlInteger(100), WdlString("right") -> WdlInteger(200)))), + List( + WdlValueSimpleton("pairOfArrayAndMap:left[0]", WdlInteger(1)), + WdlValueSimpleton("pairOfArrayAndMap:left[1]", WdlInteger(2)), + WdlValueSimpleton("pairOfArrayAndMap:right:left", WdlInteger(100)), + WdlValueSimpleton("pairOfArrayAndMap:right:right", WdlInteger(200))) + ), + SimpletonConversion( + "mapOfArrays", + WdlMap(WdlMapType(WdlStringType, WdlArrayType(WdlIntegerType)), Map( WdlString("foo") -> WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(0), WdlInteger(1))), - WdlString("bar") -> WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(2), WdlInteger(3)))) - ), - "map4" -> WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( + WdlString("bar") -> WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(2), WdlInteger(3))))), + List(WdlValueSimpleton("mapOfArrays:foo[0]", WdlInteger(0)), WdlValueSimpleton("mapOfArrays:foo[1]", WdlInteger(1)), + WdlValueSimpleton("mapOfArrays:bar[0]", WdlInteger(2)), WdlValueSimpleton("mapOfArrays:bar[1]", WdlInteger(3))) + ), + SimpletonConversion( + "escapology", + WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( WdlString("foo[1]") -> WdlString("foo"), WdlString("bar[[") -> WdlString("bar"), - WdlString("baz:qux") -> WdlString("baz:qux") - )) + WdlString("baz:qux") -> WdlString("baz:qux"))), + List(WdlValueSimpleton("escapology:foo\\[1\\]", WdlString("foo")), + WdlValueSimpleton("escapology:bar\\[\\[", WdlString("bar")), + WdlValueSimpleton("escapology:baz\\:qux", WdlString("baz:qux"))) ) + ) + + behavior of "WdlValueSimpleton and WdlValueBuilder" + + simpletonConversions foreach { case SimpletonConversion(name, wdlValue, simpletons) => + it should s"decompose WdlValues into simpletons ($name)" in { + import WdlValueSimpleton._ - val taskOutputs = wdlValues map { case (k, wv) => TaskOutput(k, wv.wdlType, IgnoredExpression) } + val map = Map(name -> wdlValue) + map.simplify should contain theSameElementsAs simpletons + } + + it should s"build simpletons back into WdlValues ($name)" in { + // The task output is used to tell us the type of output we're expecting: + val taskOutputs = List(TaskOutput(name, wdlValue.wdlType, IgnoredExpression, mock[Ast], None)) + val rebuiltValues = WdlValueBuilder.toWdlValues(taskOutputs, simpletons) + rebuiltValues.size should be(1) + rebuiltValues(name) should be(wdlValue) + } + + } + + + it should "round trip everything together with no losses" in { + + val wdlValues = (simpletonConversions map { case SimpletonConversion(name, wdlValue, simpletons) => name -> wdlValue }).toMap + val taskOutputs = wdlValues map { case (k, wv) => TaskOutput(k, wv.wdlType, IgnoredExpression, mock[Ast], None) } + val allSimpletons = simpletonConversions flatMap { case SimpletonConversion(name, wdlValue, simpletons) => simpletons } import WdlValueSimpleton._ - val actual = WdlValueBuilder.toWdlValues(taskOutputs, wdlValues.simplify) + + val actualSimpletons = wdlValues.simplify + actualSimpletons should contain theSameElementsAs allSimpletons + + val actual = WdlValueBuilder.toWdlValues(taskOutputs, actualSimpletons) actual shouldEqual wdlValues } } diff --git a/core/src/test/scala/cromwell/util/SampleWdl.scala b/core/src/test/scala/cromwell/util/SampleWdl.scala index dc2598cf194..2e4d77399df 100644 --- a/core/src/test/scala/cromwell/util/SampleWdl.scala +++ b/core/src/test/scala/cromwell/util/SampleWdl.scala @@ -4,7 +4,7 @@ import java.nio.file.{Files, Path} import java.util.UUID import better.files._ -import cromwell.core.WorkflowSourceFiles +import cromwell.core.{WorkflowSourceFilesWithoutImports} import spray.json._ import wdl4s._ import wdl4s.types.{WdlArrayType, WdlStringType} @@ -15,7 +15,7 @@ import scala.language.postfixOps trait SampleWdl extends TestFileUtil { def wdlSource(runtime: String = ""): WdlSource def asWorkflowSources(runtime: String = "", workflowOptions: String = "{}") = - WorkflowSourceFiles(wdlSource(runtime), wdlJson, workflowOptions) + WorkflowSourceFilesWithoutImports(wdlSource(runtime), wdlJson, workflowOptions) val rawInputs: WorkflowRawInputs def name = getClass.getSimpleName.stripSuffix("$") @@ -74,14 +74,14 @@ object SampleWdl { | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin.replaceAll("RUNTIME", runtime) - val Addressee = "hello.hello.addressee" + val Addressee = "wf_hello.hello.addressee" val rawInputs = Map(Addressee -> "world") - val OutputKey = "hello.hello.salutation" + val OutputKey = "wf_hello.hello.salutation" val OutputValue = "Hello world!" } @@ -117,7 +117,7 @@ object SampleWdl { | } |} | - |workflow goodbye { + |workflow wf_goodbye { | call goodbye |} """.stripMargin @@ -147,9 +147,10 @@ object SampleWdl { | output { | String empty = read_string(stdout()) | } + | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello | call goodbye {input: emptyInputString=hello.empty } | output { @@ -509,7 +510,7 @@ object SampleWdl { | RUNTIME |} | - |workflow whereami { + |workflow wf_whereami { | call whereami |} """.stripMargin.replaceAll("RUNTIME", runtime) @@ -661,7 +662,7 @@ object SampleWdl { class ScatterWdl extends SampleWdl { val tasks = s"""task A { | command { - | echo -n -e "jeff\nchris\nmiguel\nthibault\nkhalid\nscott" + | echo -n -e "jeff\nchris\nmiguel\nthibault\nkhalid\nruchi" | } | RUNTIME | output { diff --git a/core/src/test/scala/cromwell/util/WdlValueJsonFormatterSpec.scala b/core/src/test/scala/cromwell/util/WdlValueJsonFormatterSpec.scala new file mode 100644 index 00000000000..91d678c01d7 --- /dev/null +++ b/core/src/test/scala/cromwell/util/WdlValueJsonFormatterSpec.scala @@ -0,0 +1,28 @@ +package cromwell.util + +import scala.Vector + +import org.scalatest.FlatSpec +import org.scalatest.Matchers + +import JsonFormatting.WdlValueJsonFormatter.WdlValueJsonFormat +import spray.json.{ JsObject, pimpString } +import wdl4s.types.{ WdlArrayType, WdlStringType } +import wdl4s.values.{ WdlArray, WdlPair, WdlString } + +class WdlValueJsonFormatterSpec extends FlatSpec with Matchers { + + behavior of "WdlValueJsonFormat" + + it should "write WdlPair to left/right structured JsObject" in { + val left = "sanders" + val right = Vector("rubio", "carson", "cruz") + val wdlPair = WdlPair(WdlString(left), WdlArray(WdlArrayType(WdlStringType), right.map { WdlString(_) })) + val ExpectedJson: JsObject = + """|{ + | "left": "sanders", + | "right": ["rubio", "carson", "cruz"] + |}""".stripMargin.parseJson.asJsObject + WdlValueJsonFormat.write(wdlPair) should matchPattern { case ExpectedJson => } + } +} diff --git a/database/migration/src/main/resources/changelog.xml b/database/migration/src/main/resources/changelog.xml index 251e15d7954..c67137ae073 100644 --- a/database/migration/src/main/resources/changelog.xml +++ b/database/migration/src/main/resources/changelog.xml @@ -45,10 +45,15 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/database/migration/src/main/resources/changesets/workflow_store_imports_file.xml b/database/migration/src/main/resources/changesets/workflow_store_imports_file.xml new file mode 100644 index 00000000000..bd6d3f05106 --- /dev/null +++ b/database/migration/src/main/resources/changesets/workflow_store_imports_file.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + diff --git a/database/migration/src/main/resources/logback.xml b/database/migration/src/main/resources/logback.xml deleted file mode 100644 index fa27b5dde9b..00000000000 --- a/database/migration/src/main/resources/logback.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - %date %X{sourceThread} %-5level - %msg%n - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/database/migration/src/main/scala/cromwell/database/migration/custom/BatchedTaskChange.scala b/database/migration/src/main/scala/cromwell/database/migration/custom/BatchedTaskChange.scala new file mode 100644 index 00000000000..9339dc5d707 --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/custom/BatchedTaskChange.scala @@ -0,0 +1,139 @@ +package cromwell.database.migration.custom + +import java.sql.{PreparedStatement, ResultSet} + +import liquibase.database.jvm.JdbcConnection +import liquibase.exception.CustomChangeException + +/** + * Runs a migration as a series of batches. + */ +trait BatchedTaskChange extends MigrationTaskChange { + /** + * Returns sql to retrieve the maximum primary key for the table. + * + * Example: + * {{{ + * SELECT MAX([PRIMARY_KEY]) + * FROM [TABLE]; + * }}} + */ + def readCountQuery: String + + /** + * Returns sql to retrieve rows to be passed to migrateBatchRow, batching on a primary key between the half-open + * primary key range [start, stop). + * + * Example: + * {{{ + * SELECT [COLUMNS] + * FROM [TABLE] + * WHERE [PRIMARY_KEY] >= ? AND [PRIMARY_KEY] < ?; + * }}} + */ + def readBatchQuery: String + + /** + * Used to prepare the statement that will be passed repeatedly into migrateBatchRow. + * + * Example: + * {{{ + * UPDATE [TABLE] + * SET [COLUMNS] + * WHERE [PRIMARY_KEY] = ?; + * }}} + * + * Example: + * {{{ + * INSERT INTO [TABLE] + * SET [COLUMNS]; + * }}} + */ + def migrateBatchQuery: String + + /** + * Migrate a row. + * + * Read the values from readRow, update the values, set the updated values on the migrateStatement, and then call + * migrateStatement.addBatch(). Return the (estimated) number of rows to be written by this batch. + * + * @param readRow The row to migrate + * @param migrateStatement The statement to add a new migrated row + * @return The number of rows updated + */ + def migrateBatchRow(readRow: ResultSet, migrateStatement: PreparedStatement): Int + + /** + * Specify the size of a "page". + * For databases with a very large number of rows, selecting all the rows at once can generate a variety of problems. + * In order to avoid any issue, the selection is paginated. This value sets how many rows should be retrieved and + * processed at a time, before asking for the next page. + */ + private val readBatchSize = config.getInt("database.migration.read-batch-size") + + /** + * To keep the size of the insert batch from growing out of control we monitor its size and execute/commit when it + * reaches or exceeds writeBatchSize. + */ + private val writeBatchSize = config.getInt("database.migration.write-batch-size") + + override def migrate(connection: JdbcConnection) = { + + logger.info(s"Running migration $migrationName with a read batch size of " + + s"$readBatchSize and a write batch size of $writeBatchSize") + + /* + * Keep count of the size of the batch. + * + * @see writeBatchSize + */ + var batchMigrationCounter: Int = 0 + + val readCount = getReadCount(connection) + + // So we can display progress + val pageCount = Math.max(readCount / readBatchSize, 1) + + val readBatchStatement = connection.prepareStatement(readBatchQuery) + val migrateBatchStatement = connection.prepareStatement(migrateBatchQuery) + + val paginator = new QueryPaginator(readBatchStatement, readBatchSize, readCount) + + // Loop over pages + paginator.zipWithIndex foreach { + case (resultBatch, page) => + // Loop over rows in page + new ResultSetIterator(resultBatch).zipWithIndex foreach { + case (row, idx) => + batchMigrationCounter += migrateBatchRow(row, migrateBatchStatement) + // batchMigrationCounter can actually be bigger than writeBatchSize as wdlValues are processed atomically, + // so this is a best effort + if (batchMigrationCounter >= writeBatchSize) { + migrateBatchStatement.executeBatch() + connection.commit() + batchMigrationCounter = 0 + } + } + + resultBatch.close() + + val progress = Math.min((page + 1) * 100 / pageCount, 100) + logger.info(s"[$migrationName] $progress%") + } + + if (batchMigrationCounter != 0) { + migrateBatchStatement.executeBatch() + connection.commit() + } + } + + private def getReadCount(connection: JdbcConnection): Int = { + val readCountResultSet = connection.createStatement().executeQuery(readCountQuery) + + if (readCountResultSet.next()) { + readCountResultSet.getInt(1) + } else { + throw new CustomChangeException(s"Could not find max value for pagination from sql:\n$readCountQuery") + } + } +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/custom/MigrationTaskChange.scala b/database/migration/src/main/scala/cromwell/database/migration/custom/MigrationTaskChange.scala new file mode 100644 index 00000000000..4fa7935e632 --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/custom/MigrationTaskChange.scala @@ -0,0 +1,48 @@ +package cromwell.database.migration.custom + +import com.typesafe.config.ConfigFactory +import com.typesafe.scalalogging.LazyLogging +import liquibase.change.custom.CustomTaskChange +import liquibase.database.Database +import liquibase.database.jvm.JdbcConnection +import liquibase.exception.{CustomChangeException, ValidationErrors} +import liquibase.resource.ResourceAccessor + +/** + * Provides a default implementation of a liquibase custom task change. + */ +trait MigrationTaskChange extends CustomTaskChange with LazyLogging { + lazy val config = ConfigFactory.load + + /** @return name of the migration, defaulting to the class name */ + def migrationName: String = getClass.getSimpleName + + /** + * Performs the migration. + * + * @param connection the connection to the database + */ + def migrate(connection: JdbcConnection): Unit + + override def execute(database: Database): Unit = { + try { + val dbConn = database.getConnection.asInstanceOf[JdbcConnection] + val autoCommit = dbConn.getAutoCommit + dbConn.setAutoCommit(false) + migrate(dbConn) + dbConn.setAutoCommit(autoCommit) + } catch { + case customChangeException: CustomChangeException => throw customChangeException + case exception: Exception => + throw new CustomChangeException(s"Could not apply migration script for $migrationName", exception) + } + } + + override def setUp() = {} + + override def getConfirmationMessage = s"$migrationName complete." + + override def validate(database: Database) = new ValidationErrors + + override def setFileOpener(resourceAccessor: ResourceAccessor) = {} +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/QueryPaginator.scala b/database/migration/src/main/scala/cromwell/database/migration/custom/QueryPaginator.scala similarity index 88% rename from database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/QueryPaginator.scala rename to database/migration/src/main/scala/cromwell/database/migration/custom/QueryPaginator.scala index f7929cea49b..0e6514043c3 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/QueryPaginator.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/custom/QueryPaginator.scala @@ -1,4 +1,4 @@ -package cromwell.database.migration.metadata.table.symbol +package cromwell.database.migration.custom import java.sql.{PreparedStatement, ResultSet} diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/ResultSetIterator.scala b/database/migration/src/main/scala/cromwell/database/migration/custom/ResultSetIterator.scala similarity index 73% rename from database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/ResultSetIterator.scala rename to database/migration/src/main/scala/cromwell/database/migration/custom/ResultSetIterator.scala index 8658a59da50..a9b22962578 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/ResultSetIterator.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/custom/ResultSetIterator.scala @@ -1,4 +1,4 @@ -package cromwell.database.migration.metadata.table.symbol +package cromwell.database.migration.custom import java.sql.ResultSet diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/CallOutputSymbolTableMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/CallOutputSymbolTableMigration.scala index fbd5529c1f8..09cf42dbff3 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/CallOutputSymbolTableMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/CallOutputSymbolTableMigration.scala @@ -6,7 +6,6 @@ import wdl4s.values._ class CallOutputSymbolTableMigration extends SymbolTableMigration { override def processSymbol(statement: PreparedStatement, - rowIndex: Int, workflowUuid: String, symbolName: String, symbolScope: String, diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/InputSymbolTableMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/InputSymbolTableMigration.scala index 81756d017ae..5740ac3d9f1 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/InputSymbolTableMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/InputSymbolTableMigration.scala @@ -7,7 +7,6 @@ import wdl4s.values._ class InputSymbolTableMigration extends SymbolTableMigration { override def processSymbol(statement: PreparedStatement, - rowIndex: Int, workflowUuid: String, symbolName: String, symbolScope: String, diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/MetadataStatement.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/MetadataStatement.scala index a938ebcdf22..311b364fe67 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/MetadataStatement.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/MetadataStatement.scala @@ -4,7 +4,6 @@ import java.sql.{PreparedStatement, Timestamp, Types} import java.time.format.DateTimeFormatter import java.time.{OffsetDateTime, ZoneId, ZoneOffset} -import liquibase.database.jvm.JdbcConnection import org.slf4j.LoggerFactory import wdl4s.values.{WdlBoolean, WdlFloat, WdlInteger, WdlValue} @@ -18,12 +17,12 @@ object MetadataStatement { val TimestampIdx = 7 val ValueTypeIdx = 8 - def makeStatement(connection: JdbcConnection): PreparedStatement = connection.prepareStatement( + val InsertSql = """ |INSERT INTO METADATA_JOURNAL |(WORKFLOW_EXECUTION_UUID, METADATA_KEY, CALL_FQN, JOB_SCATTER_INDEX, JOB_RETRY_ATTEMPT, METADATA_VALUE, METADATA_TIMESTAMP, METADATA_VALUE_TYPE) |VALUES (?, ?, ?, ?, ?, ?, ?, ?) - """.stripMargin) + """.stripMargin implicit class OffsetDateTimeToSystemTimestamp(val offsetDateTime: OffsetDateTime) extends AnyVal { def toSystemTimestamp = Timestamp.valueOf(offsetDateTime.atZoneSameInstant(ZoneId.systemDefault).toLocalDateTime) diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/SymbolTableMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/SymbolTableMigration.scala index a400866a990..19bbc1279da 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/SymbolTableMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/SymbolTableMigration.scala @@ -2,15 +2,9 @@ package cromwell.database.migration.metadata.table.symbol import java.sql.{PreparedStatement, ResultSet} -import com.typesafe.config.ConfigFactory import cromwell.core.simpleton.WdlValueSimpleton._ import cromwell.database.migration.WdlTransformation -import liquibase.change.custom.CustomTaskChange -import liquibase.database.Database -import liquibase.database.jvm.JdbcConnection -import liquibase.exception.{CustomChangeException, ValidationErrors} -import liquibase.resource.ResourceAccessor -import org.slf4j.LoggerFactory +import cromwell.database.migration.custom.BatchedTaskChange import wdl4s.WdlExpression import wdl4s.types.WdlType import wdl4s.values.WdlValue @@ -25,42 +19,12 @@ object SymbolTableMigration { """.stripMargin } -trait SymbolTableMigration extends CustomTaskChange { - import SymbolTableMigration._ +trait SymbolTableMigration extends BatchedTaskChange { import cromwell.database.migration.WdlTransformation._ - // Nb of rows to retrieve / process in a batch - val config = ConfigFactory.load + override val readCountQuery = SymbolTableMigration.NbRowsQuery - /** - * Specify the size of a "page". - * For databases with a very large number of symbols, selecting all the rows at once can generate a variety of problems. - * In order to avoid any issue, the selection is paginated. This value sets how many rows should be retrieved and processed at a time, before asking for the next chunk. - */ - val readBatchSize = config.getInt("database.migration.read-batch-size") - - /** - * Because a symbol row can contain any arbitrary wdl value, the amount of metadata rows to insert from a single symbol row can vary from 1 to several thousands (or more). - * To keep the size of the insert batch from growing out of control we monitor its size and execute/commit when it reaches or exceeds writeBatchSize. - */ - val writeBatchSize = config.getInt("database.migration.write-batch-size") - - val logger = LoggerFactory.getLogger("LiquibaseMetadataMigration") - - override def execute(database: Database): Unit = { - try { - val dbConn = database.getConnection.asInstanceOf[JdbcConnection] - val autoCommit = dbConn.getAutoCommit - dbConn.setAutoCommit(false) - migrate(dbConn) - dbConn.setAutoCommit(autoCommit) - } catch { - case t: CustomChangeException => throw t - case t: Throwable => throw new CustomChangeException(s"Could not apply migration script for metadata at ${getClass.getSimpleName}", t) - } - } - - def tmpSymbolPaginatedStatement(connection: JdbcConnection): PreparedStatement = connection.prepareStatement(""" + override val readBatchQuery = """ |SELECT | WORKFLOW_EXECUTION_UUID, | SYMBOL_NAME, @@ -71,64 +35,14 @@ trait SymbolTableMigration extends CustomTaskChange { | WDL_VALUE | FROM TMP_SYMBOL | WHERE TMP_SYMBOL_ID >= ? AND TMP_SYMBOL_ID < ?; - """.stripMargin) - - private def migrate(connection: JdbcConnection) = { - logger.info(s"Running migration with a read batch size of $readBatchSize and a write batch size of $writeBatchSize") - - /** - * Keep count of the size of the batch. - * - * @see writeBatchSize - */ - var insertsCounter: Int = 0 - - // Find the max row id in the TMP_SYMBOL table - val tmpSymbolCountRS = connection.createStatement().executeQuery(NbRowsQuery) - - if (tmpSymbolCountRS.next()) { - val tmpSymbolCount = tmpSymbolCountRS.getInt("symbol_count") - - // So we can display progress - val nbPages = Math.max(tmpSymbolCount / readBatchSize, 1) - - val paginator = new QueryPaginator(tmpSymbolPaginatedStatement(connection), readBatchSize, tmpSymbolCount) - val metadataInsertStatement = MetadataStatement.makeStatement(connection) - - // Loop over pages - paginator.zipWithIndex foreach { - case (resultBatch, page) => - // Loop over rows in page - new ResultSetIterator(resultBatch).zipWithIndex foreach { - case (row, idx) => - insertsCounter += migrateRow(connection, metadataInsertStatement, row, idx) - // insertsCounter can actually be bigger than writeBatchSize as wdlValues are processed atomically, so this is a best effort - if (insertsCounter >= writeBatchSize) { - metadataInsertStatement.executeBatch() - connection.commit() - insertsCounter = 0 - } - } - - resultBatch.close() - - val progress = Math.min((page + 1) * 100 / nbPages, 100) - logger.info(s"[${getClass.getSimpleName}] $progress%") - } + """.stripMargin - if (insertsCounter != 0) { - metadataInsertStatement.executeBatch() - connection.commit() - } - } else { - throw new CustomChangeException("Could not find max value of symbol id for pagination") - } - } + override val migrateBatchQuery = MetadataStatement.InsertSql /** * Migrate a row to the metadata table */ - protected def migrateRow(connection: JdbcConnection, statement: PreparedStatement, row: ResultSet, idx: Int): Int = { + override def migrateBatchRow(row: ResultSet, statement: PreparedStatement): Int = { // Try to coerce the value to a WdlValue val value = for { wdlType <- Try(WdlType.fromWdlString(row.getString("WDL_TYPE"))) @@ -147,7 +61,7 @@ trait SymbolTableMigration extends CustomTaskChange { value match { case Success(wdlValue) => - processSymbol(statement, idx, workflowUuid, symbolName, symbolScope, symbolIndex, symbolAttempt, wdlValue) + processSymbol(statement, workflowUuid, symbolName, symbolScope, symbolIndex, symbolAttempt, wdlValue) case Failure(f) => logger.error( s"""Could not parse symbol of type ${row.getString("WDL_TYPE")} @@ -157,7 +71,6 @@ trait SymbolTableMigration extends CustomTaskChange { } def processSymbol(statement: PreparedStatement, - idx: Int, workflowUuid: String, symbolName: String, symbolScope: String, @@ -165,12 +78,6 @@ trait SymbolTableMigration extends CustomTaskChange { symbolAttempt: Option[Int], wdlValue: WdlValue): Int - override def setUp(): Unit = () - - override def validate(database: Database): ValidationErrors = new ValidationErrors - - override def setFileOpener(resourceAccessor: ResourceAccessor): Unit = {} - /** * Add all necessary statements to the batch for the provided WdlValue. */ diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/WorkflowOutputSymbolTableMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/WorkflowOutputSymbolTableMigration.scala index f188fc0cd4e..269d58a2491 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/WorkflowOutputSymbolTableMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/WorkflowOutputSymbolTableMigration.scala @@ -7,7 +7,6 @@ import wdl4s.values._ class WorkflowOutputSymbolTableMigration extends SymbolTableMigration { override def processSymbol(statement: PreparedStatement, - rowIndex: Int, workflowUuid: String, symbolName: String, symbolScope: String, diff --git a/database/migration/src/main/scala/cromwell/database/migration/restart/table/RenameWorkflowOptionKeysMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/restart/table/RenameWorkflowOptionKeysMigration.scala index 3852750ce8f..c0610ae368b 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/restart/table/RenameWorkflowOptionKeysMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/restart/table/RenameWorkflowOptionKeysMigration.scala @@ -1,19 +1,11 @@ package cromwell.database.migration.restart.table +import cromwell.database.migration.workflowoptions.WorkflowOptionsRenaming._ import cromwell.database.migration.restart.table.RenameWorkflowOptionKeysMigration._ import liquibase.database.jvm.JdbcConnection import spray.json._ - object RenameWorkflowOptionKeysMigration { - private val RenamedOptionKeys = Map( - "defaultRuntimeOptions" -> "default_runtime_attributes", - "workflowFailureMode" -> "workflow_failure_mode", - "workflow_log_dir" -> "final_workflow_log_dir", - "outputs_path" -> "final_workflow_outputs_dir", - "call_logs_dir" -> "final_call_logs_dir" - ) - private val QueryWorkflowStore = " SELECT WORKFLOW_STORE_ID, WORKFLOW_OPTIONS FROM WORKFLOW_STORE " private val UpdateWorkflowStore = " UPDATE WORKFLOW_STORE SET WORKFLOW_OPTIONS = ? WHERE WORKFLOW_STORE_ID = ? " @@ -25,14 +17,6 @@ class RenameWorkflowOptionKeysMigration extends AbstractRestartMigration { override protected def description: String = "Workflow option renaming" override protected def doMigration(connection: JdbcConnection): Unit = { - - def renameOptionKeys(field: JsField): JsField = { - field match { - case (oldName, value) if RenamedOptionKeys.contains(oldName) => RenamedOptionKeys(oldName) -> value - case noop => noop - } - } - val query = connection.createStatement() lazy val insert = connection.prepareStatement(UpdateWorkflowStore) query.execute(QueryWorkflowStore) diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/ClearMetadataEntryWorkflowOptions.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/ClearMetadataEntryWorkflowOptions.scala new file mode 100644 index 00000000000..4d9263491f4 --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/ClearMetadataEntryWorkflowOptions.scala @@ -0,0 +1,15 @@ +package cromwell.database.migration.workflowoptions + +import cromwell.core.WorkflowOptions + +/** + * Clear the values from encrypted keys in METADATA_ENTRY. + */ +class ClearMetadataEntryWorkflowOptions extends WorkflowOptionsChange { + override val tableName = "METADATA_ENTRY" + override val primaryKeyColumn = "METADATA_JOURNAL_ID" + override val workflowOptionsColumn = "METADATA_VALUE" + override val additionalReadBatchFilters = "AND METADATA_KEY = 'submittedFiles:options'" + + override def migrateWorkflowOptions(workflowOptions: WorkflowOptions) = workflowOptions.clearEncryptedValues +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/EncryptWorkflowStoreEntryWorkflowOptions.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/EncryptWorkflowStoreEntryWorkflowOptions.scala new file mode 100644 index 00000000000..9c3cc74905d --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/EncryptWorkflowStoreEntryWorkflowOptions.scala @@ -0,0 +1,14 @@ +package cromwell.database.migration.workflowoptions + +import cromwell.core.WorkflowOptions + +/** + * Encrypt the values for encrypted keys in WORKFLOW_STORE_ENTRY. + */ +class EncryptWorkflowStoreEntryWorkflowOptions extends WorkflowOptionsChange { + override val tableName = "WORKFLOW_STORE_ENTRY" + override val primaryKeyColumn = "WORKFLOW_STORE_ENTRY_ID" + override val workflowOptionsColumn = "WORKFLOW_OPTIONS" + + override def migrateWorkflowOptions(workflowOptions: WorkflowOptions) = workflowOptions.asPrettyJson +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/RenameWorkflowOptionsInMetadata.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/RenameWorkflowOptionsInMetadata.scala new file mode 100644 index 00000000000..e6842b45951 --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/RenameWorkflowOptionsInMetadata.scala @@ -0,0 +1,38 @@ +package cromwell.database.migration.workflowoptions + +import java.sql.{PreparedStatement, ResultSet} + +import cromwell.database.migration.custom.BatchedTaskChange +import cromwell.database.migration.workflowoptions.WorkflowOptionsRenaming._ +import spray.json.{JsObject, _} + +class RenameWorkflowOptionsInMetadata extends BatchedTaskChange { + val tableName = "METADATA_ENTRY" + val primaryKeyColumn = "METADATA_JOURNAL_ID" + val workflowOptionsColumn = "METADATA_VALUE" + val additionalReadBatchFilters = "AND METADATA_KEY = 'submittedFiles:options'" + + override def readCountQuery = s"SELECT MAX($primaryKeyColumn) FROM $tableName;" + + override def readBatchQuery = + s"""|SELECT $primaryKeyColumn, $workflowOptionsColumn + | FROM $tableName + | WHERE $primaryKeyColumn >= ? AND $primaryKeyColumn < ? $additionalReadBatchFilters; + |""".stripMargin + + override def migrateBatchQuery = s"UPDATE $tableName SET $workflowOptionsColumn = ? WHERE $primaryKeyColumn = ?;" + + override def migrateBatchRow(readRow: ResultSet, migrateStatement: PreparedStatement): Int = { + val rowId = readRow.getInt(1) + + val migratedJson = readRow.getString(2).parseJson match { + case JsObject(fields) => JsObject(fields map renameOptionKeys) + case other => other + } + + migrateStatement.setString(1, migratedJson.prettyPrint) + migrateStatement.setInt(2, rowId) + migrateStatement.addBatch() + 1 + } +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsChange.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsChange.scala new file mode 100644 index 00000000000..3c1b6b68f6b --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsChange.scala @@ -0,0 +1,69 @@ +package cromwell.database.migration.workflowoptions + +import java.sql.{PreparedStatement, ResultSet} + +import cromwell.core.WorkflowOptions +import cromwell.database.migration.custom.BatchedTaskChange +import liquibase.database.Database + +import scala.util.{Failure, Success} + +/** + * Edits the workflow options stored in a table. + */ +trait WorkflowOptionsChange extends BatchedTaskChange { + /** @return name of the table */ + def tableName: String + + /** @return primary key of the table */ + def primaryKeyColumn: String + + /** @return column storing the workflow options */ + def workflowOptionsColumn: String + + /** @return any additional filters to add to the where clause, starting with "AND ..." */ + def additionalReadBatchFilters: String = "" + + /** + * Takes in the workflow options and returns the edited version as a json string. + * + * @param workflowOptions workflow options object + * @return edited workflow object json + */ + def migrateWorkflowOptions(workflowOptions: WorkflowOptions): String + + override def execute(database: Database): Unit = { + val configPath = "workflow-options.encrypted-fields" + if (config.hasPath(configPath) && !config.getStringList(configPath).isEmpty) { + super.execute(database) + } + } + + override def readCountQuery = s"SELECT MAX($primaryKeyColumn) FROM $tableName;" + + override def readBatchQuery = + s"""|SELECT $primaryKeyColumn, $workflowOptionsColumn + | FROM $tableName + | WHERE $primaryKeyColumn >= ? AND $primaryKeyColumn < ? $additionalReadBatchFilters; + |""".stripMargin + + override def migrateBatchQuery = s"UPDATE $tableName SET $workflowOptionsColumn = ? WHERE $primaryKeyColumn = ?;" + + override def migrateBatchRow(readRow: ResultSet, migrateStatement: PreparedStatement): Int = { + val rowId = readRow.getInt(1) + val workflowOptionsJson = readRow.getString(2) + WorkflowOptions.fromJsonString(workflowOptionsJson) match { + case Success(workflowOptions) => + val migratedJson = migrateWorkflowOptions(workflowOptions) + migrateStatement.setString(1, migratedJson) + migrateStatement.setInt(2, rowId) + migrateStatement.addBatch() + 1 + case Failure(exception) => + logger.error( + s"Unable to process $tableName pk $rowId\njson:\n$workflowOptionsJson", exception) + 0 + } + } + +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsRenaming.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsRenaming.scala new file mode 100644 index 00000000000..d148a570fdf --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsRenaming.scala @@ -0,0 +1,21 @@ +package cromwell.database.migration.workflowoptions + +import spray.json._ + +object WorkflowOptionsRenaming { + + private val RenamedOptionKeys = Map( + "defaultRuntimeOptions" -> "default_runtime_attributes", + "workflowFailureMode" -> "workflow_failure_mode", + "workflow_log_dir" -> "final_workflow_log_dir", + "outputs_path" -> "final_workflow_outputs_dir", + "call_logs_dir" -> "final_call_logs_dir" + ) + + def renameOptionKeys(field: JsField): JsField = { + field match { + case (oldName, value) if RenamedOptionKeys.contains(oldName) => RenamedOptionKeys(oldName) -> value + case noop => noop + } + } +} diff --git a/database/sql/src/main/scala/cromwell/database/slick/CallCachingSlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/CallCachingSlickDatabase.scala index ce40e4e6a87..deb7b6d5e3d 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/CallCachingSlickDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/CallCachingSlickDatabase.scala @@ -5,6 +5,7 @@ import cromwell.database.sql._ import cromwell.database.sql.joins.CallCachingJoin import scala.concurrent.{ExecutionContext, Future} +import scala.language.postfixOps trait CallCachingSlickDatabase extends CallCachingSqlDatabase { this: SlickDatabase => diff --git a/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala index 80a4413fd17..173de0e89cc 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala @@ -58,7 +58,8 @@ class SlickDatabase(override val originalDatabaseConfig: Config) extends SqlData with JobKeyValueSlickDatabase with JobStoreSlickDatabase with CallCachingSlickDatabase - with SummaryStatusSlickDatabase { + with SummaryStatusSlickDatabase + with SubWorkflowStoreSlickDatabase { override val urlKey = SlickDatabase.urlKey(originalDatabaseConfig) private val slickConfig = DatabaseConfig.forConfig[JdbcProfile]("", databaseConfig) diff --git a/database/sql/src/main/scala/cromwell/database/slick/SubWorkflowStoreSlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/SubWorkflowStoreSlickDatabase.scala new file mode 100644 index 00000000000..05d216141d6 --- /dev/null +++ b/database/sql/src/main/scala/cromwell/database/slick/SubWorkflowStoreSlickDatabase.scala @@ -0,0 +1,67 @@ +package cromwell.database.slick + +import cats.instances.future._ +import cats.syntax.functor._ +import cromwell.database.sql.SubWorkflowStoreSqlDatabase +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} +import scala.language.postfixOps + +trait SubWorkflowStoreSlickDatabase extends SubWorkflowStoreSqlDatabase { + this: SlickDatabase => + + import dataAccess.driver.api._ + + def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] = { + val action = for { + workflowStoreEntry <- dataAccess.workflowStoreEntriesForWorkflowExecutionUuid(rootWorkflowExecutionUuid).result.headOption + _ <- workflowStoreEntry match { + case Some(rootWorkflow) => + dataAccess.subWorkflowStoreEntryIdsAutoInc += + SubWorkflowStoreEntry( + rootWorkflow.workflowStoreEntryId, + parentWorkflowExecutionUuid, + callFullyQualifiedName, + jobIndex, + jobAttempt, + subWorkflowExecutionUuid + ) + case None => DBIO.failed(new IllegalArgumentException(s"Could not find root workflow with UUID $rootWorkflowExecutionUuid")) + } + } yield () + + runTransaction(action) void + } + + override def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int) + (implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] = { + val action = for { + subWorkflowStoreEntryOption <- dataAccess.subWorkflowStoreEntriesForJobKey( + (parentWorkflowExecutionUuid, callFqn, jobIndex, jobAttempt) + ).result.headOption + } yield subWorkflowStoreEntryOption + + runTransaction(action) + } + + override def removeSubWorkflowStoreEntries(rootWorkflowExecutionUuid: String) + (implicit ec: ExecutionContext): Future[Int] = { + val action = for { + workflowStoreEntry <- dataAccess.workflowStoreEntriesForWorkflowExecutionUuid(rootWorkflowExecutionUuid).result.headOption + deleted <- workflowStoreEntry match { + case Some(rootWorkflow) => + dataAccess.subWorkflowStoreEntriesForRootWorkflowId(rootWorkflow.workflowStoreEntryId.get).delete + case None => + DBIO.successful(0) + } + } yield deleted + + runTransaction(action) + } +} diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/DataAccessComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/DataAccessComponent.scala index cdcce5a7a57..b0c70abf351 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/DataAccessComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/DataAccessComponent.scala @@ -14,7 +14,8 @@ class DataAccessComponent(val driver: JdbcProfile) with MetadataEntryComponent with SummaryStatusEntryComponent with WorkflowMetadataSummaryEntryComponent - with WorkflowStoreEntryComponent { + with WorkflowStoreEntryComponent + with SubWorkflowStoreEntryComponent { import driver.api._ @@ -29,5 +30,6 @@ class DataAccessComponent(val driver: JdbcProfile) metadataEntries.schema ++ summaryStatusEntries.schema ++ workflowMetadataSummaryEntries.schema ++ - workflowStoreEntries.schema + workflowStoreEntries.schema ++ + subWorkflowStoreEntries.schema } diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala new file mode 100644 index 00000000000..848c60c4b1c --- /dev/null +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala @@ -0,0 +1,62 @@ +package cromwell.database.slick.tables + +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import slick.model.ForeignKeyAction.Cascade + +trait SubWorkflowStoreEntryComponent { + + this: DriverComponent with WorkflowStoreEntryComponent => + + import driver.api._ + + class SubWorkflowStoreEntries(tag: Tag) extends Table[SubWorkflowStoreEntry](tag, "SUB_WORKFLOW_STORE_ENTRY") { + def subWorkflowStoreEntryId = column[Int]("SUB_WORKFLOW_STORE_ENTRY_ID", O.PrimaryKey, O.AutoInc) + + def rootWorkflowId = column[Int]("ROOT_WORKFLOW_ID") + + def parentWorkflowExecutionUuid = column[String]("PARENT_WORKFLOW_EXECUTION_UUID") + + def callFullyQualifiedName = column[String]("CALL_FULLY_QUALIFIED_NAME") + + def callIndex = column[Int]("CALL_INDEX") + + def callAttempt = column[Int]("CALL_ATTEMPT") + + def subWorkflowExecutionUuid = column[String]("SUB_WORKFLOW_EXECUTION_UUID") + + override def * = (rootWorkflowId.?, parentWorkflowExecutionUuid, callFullyQualifiedName, callIndex, callAttempt, subWorkflowExecutionUuid, subWorkflowStoreEntryId.?) <> (SubWorkflowStoreEntry.tupled, SubWorkflowStoreEntry.unapply) + + def ucSubWorkflowStoreEntryPweuCfqnJiJa = index("UC_SUB_WORKFLOW_STORE_ENTRY_PWEU_CFQN_CI_CA", + (parentWorkflowExecutionUuid, callFullyQualifiedName, callIndex, callAttempt), unique = true) + + def fkSubWorkflowStoreRootWorkflowStoreEntryId = foreignKey("FK_SUB_WORKFLOW_STORE_ROOT_WORKFLOW_ID_WORKFLOW_STORE_ENTRY_ID", + rootWorkflowId, workflowStoreEntries)(_.workflowStoreEntryId, onDelete = Cascade) + + def ixSubWorkflowStoreEntryPweu = index("IX_SUB_WORKFLOW_STORE_ENTRY_PWEU", parentWorkflowExecutionUuid, unique = false) + } + + protected val subWorkflowStoreEntries = TableQuery[SubWorkflowStoreEntries] + + val subWorkflowStoreEntryIdsAutoInc = subWorkflowStoreEntries returning subWorkflowStoreEntries.map(_.subWorkflowStoreEntryId) + + val subWorkflowStoreEntriesForRootWorkflowId = Compiled( + (rootWorkflowId: Rep[Int]) => for { + subWorkflowStoreEntry <- subWorkflowStoreEntries + if subWorkflowStoreEntry.rootWorkflowId === rootWorkflowId + } yield subWorkflowStoreEntry + ) + + /** + * Useful for finding the unique sub workflow entry for a given job key + */ + val subWorkflowStoreEntriesForJobKey = Compiled( + (parentWorkflowExecutionUuid: Rep[String], callFullyQualifiedName: Rep[String], jobIndex: Rep[Int], + jobAttempt: Rep[Int]) => + for { + subWorkflowStoreEntry <- subWorkflowStoreEntries + if subWorkflowStoreEntry.parentWorkflowExecutionUuid === parentWorkflowExecutionUuid && + subWorkflowStoreEntry.callFullyQualifiedName === callFullyQualifiedName && + subWorkflowStoreEntry.callIndex === jobIndex && subWorkflowStoreEntry.callAttempt === jobAttempt + } yield subWorkflowStoreEntry + ) +} diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala index 4a248f803ac..dc896ca3dc4 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala @@ -1,6 +1,6 @@ package cromwell.database.slick.tables -import java.sql.{Clob, Timestamp} +import java.sql.{Blob, Clob, Timestamp} import cromwell.database.sql.tables.WorkflowStoreEntry @@ -25,8 +25,10 @@ trait WorkflowStoreEntryComponent { def submissionTime = column[Timestamp]("SUBMISSION_TIME") + def importsZipFile = column[Option[Blob]]("IMPORTS_ZIP") + override def * = (workflowExecutionUuid, workflowDefinition, workflowInputs, workflowOptions, workflowState, - submissionTime, workflowStoreEntryId.?) <> (WorkflowStoreEntry.tupled, WorkflowStoreEntry.unapply) + submissionTime, importsZipFile, workflowStoreEntryId.?) <> (WorkflowStoreEntry.tupled, WorkflowStoreEntry.unapply) def ucWorkflowStoreEntryWeu = index("UC_WORKFLOW_STORE_ENTRY_WEU", workflowExecutionUuid, unique = true) diff --git a/database/sql/src/main/scala/cromwell/database/sql/SqlDatabase.scala b/database/sql/src/main/scala/cromwell/database/sql/SqlDatabase.scala index c6c29479ba0..e1431de761c 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/SqlDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/SqlDatabase.scala @@ -10,7 +10,8 @@ trait SqlDatabase extends AutoCloseable with CallCachingSqlDatabase with JobStoreSqlDatabase with MetadataSqlDatabase - with WorkflowStoreSqlDatabase { + with WorkflowStoreSqlDatabase + with SubWorkflowStoreSqlDatabase { protected val urlKey: String protected val originalDatabaseConfig: Config diff --git a/database/sql/src/main/scala/cromwell/database/sql/SubWorkflowStoreSqlDatabase.scala b/database/sql/src/main/scala/cromwell/database/sql/SubWorkflowStoreSqlDatabase.scala new file mode 100644 index 00000000000..10707dc9031 --- /dev/null +++ b/database/sql/src/main/scala/cromwell/database/sql/SubWorkflowStoreSqlDatabase.scala @@ -0,0 +1,21 @@ +package cromwell.database.sql + +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} + +trait SubWorkflowStoreSqlDatabase { + this: SqlDatabase => + + def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] + + def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int) + (implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] + + def removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Int] +} diff --git a/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala b/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala new file mode 100644 index 00000000000..2e718179a9f --- /dev/null +++ b/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala @@ -0,0 +1,12 @@ +package cromwell.database.sql.tables + +case class SubWorkflowStoreEntry +( + rootWorkflowId: Option[Int], + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String, + subWorkflowStoreEntryId: Option[Int] = None +) diff --git a/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala b/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala index 1154256b559..66cd800851e 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala @@ -1,6 +1,6 @@ package cromwell.database.sql.tables -import java.sql.{Clob, Timestamp} +import java.sql.{Blob, Clob, Timestamp} case class WorkflowStoreEntry ( @@ -10,5 +10,6 @@ case class WorkflowStoreEntry workflowOptions: Clob, workflowState: String, submissionTime: Timestamp, + importsZipFile: Option[Blob], workflowStoreEntryId: Option[Int] = None ) diff --git a/engine/src/main/resources/logback.xml b/engine/src/main/resources/logback.xml deleted file mode 100644 index fa27b5dde9b..00000000000 --- a/engine/src/main/resources/logback.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - %date %X{sourceThread} %-5level - %msg%n - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/engine/src/main/resources/swagger/cromwell.yaml b/engine/src/main/resources/swagger/cromwell.yaml index 7960bb93714..e2e825f8f98 100644 --- a/engine/src/main/resources/swagger/cromwell.yaml +++ b/engine/src/main/resources/swagger/cromwell.yaml @@ -93,6 +93,11 @@ paths: required: false type: file in: formData + - name: wdlDependencies + description: Workflow Options JSON + required: false + type: file + in: formData tags: - Workflows responses: @@ -134,6 +139,11 @@ paths: required: false type: file in: formData + - name: wdlDependencies + description: Workflow Options JSON + required: false + type: file + in: formData tags: - Workflows responses: @@ -386,6 +396,13 @@ paths: type: string collectionFormat: multi in: query + - name: expandSubWorkflows + description: > + When true, metadata for sub workflows will be fetched and inserted automatically in the metadata response. + required: false + type: boolean + default: false + in: query tags: - Workflows responses: diff --git a/engine/src/main/resources/workflowTimings/workflowTimings.html b/engine/src/main/resources/workflowTimings/workflowTimings.html index bd9de21d647..3e1df152c7a 100644 --- a/engine/src/main/resources/workflowTimings/workflowTimings.html +++ b/engine/src/main/resources/workflowTimings/workflowTimings.html @@ -1,40 +1,33 @@ - - - + + + var parentWorkflow; + if (selectedRow) parentWorkflow = chartView.getValue(selectedRow, 0); + + var indexOfParentWorkflow = expandedParentWorkflows.indexOf(parentWorkflow); + + if (indexOfParentWorkflow != -1) { + // Remove the parent workflow from the list if it's in it + expandedParentWorkflows.splice(indexOfParentWorkflow, 1); + } else if (parentWorkflow && parentWorkflowNames.indexOf(parentWorkflow) != -1) { + // Add it if it's not + expandedParentWorkflows.push(parentWorkflow); + } + + var rowsToDisplay = dt.getFilteredRows([filter]); + var view = new google.visualization.DataView(dt); + view.setRows(rowsToDisplay); + return view; + } + + function hideAllSubWorkflows(dt) { + var view = new google.visualization.DataView(dt); + function filterFunction(cell, row, column, table) { + return table.getRowProperty(row, "ancestry").length != 0; + } + + view.hideRows(dt.getFilteredRows([{column: 0, test: filterFunction}])); + return view; + } + + + + -
diff --git a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala index ab9cbceac34..a738984dd96 100644 --- a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala +++ b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala @@ -1,18 +1,40 @@ package cromwell.engine -import java.nio.file.{FileSystem, FileSystems} - +import akka.actor.ActorSystem import cats.data.Validated.{Invalid, Valid} +import com.google.api.client.http.HttpResponseException import com.typesafe.config.ConfigFactory import cromwell.core.WorkflowOptions -import cromwell.engine.backend.EnhancedWorkflowOptions._ -import cromwell.filesystems.gcs.{GcsFileSystem, GcsFileSystemProvider, GoogleConfiguration} +import cromwell.core.path.{CustomRetryParams, DefaultPathBuilder, PathBuilder} +import cromwell.core.retry.SimpleExponentialBackoff +import cromwell.filesystems.gcs.{GoogleConfiguration, RetryableGcsPathBuilderFactory} import lenthall.exception.MessageAggregation import net.ceedubs.ficus.Ficus._ -import scala.concurrent.ExecutionContext +import scala.concurrent.duration._ +import scala.language.postfixOps + +case class EngineFilesystems(actorSystem: ActorSystem) { -object EngineFilesystems { + private def isFatalGcsException(t: Throwable): Boolean = t match { + case e: HttpResponseException if e.getStatusCode == 403 => true + case e: HttpResponseException if e.getStatusCode == 400 && e.getContent.contains("INVALID_ARGUMENT") => true + case _ => false + } + + private def isTransientGcsException(t: Throwable): Boolean = t match { + // Quota exceeded + case e: HttpResponseException if e.getStatusCode == 429 => true + case _ => false + } + + private val GcsRetryParams = CustomRetryParams( + timeout = Duration.Inf, + maxRetries = Option(3), + backoff = SimpleExponentialBackoff(1 seconds, 3 seconds, 1.5D), + isTransient = isTransientGcsException, + isFatal = isFatalGcsException + ) private val config = ConfigFactory.load private val googleConf: GoogleConfiguration = GoogleConfiguration(config) @@ -26,14 +48,11 @@ object EngineFilesystems { } } - def filesystemsForWorkflow(workflowOptions: WorkflowOptions)(implicit ec: ExecutionContext): List[FileSystem] = { - def gcsFileSystem: Option[GcsFileSystem] = { - googleAuthMode map { mode => - val storage = mode.buildStorage(workflowOptions.toGoogleAuthOptions, googleConf.applicationName) - GcsFileSystem(GcsFileSystemProvider(storage)) - } - } + private val gcsPathBuilderFactory = googleAuthMode map { mode => + RetryableGcsPathBuilderFactory(mode, customRetryParams = GcsRetryParams) + } - List(gcsFileSystem, Option(FileSystems.getDefault)).flatten + def pathBuildersForWorkflow(workflowOptions: WorkflowOptions): List[PathBuilder] = { + List(gcsPathBuilderFactory map { _.withOptions(workflowOptions)(actorSystem) }, Option(DefaultPathBuilder)).flatten } } diff --git a/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala b/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala index c493b41a6a9..d8aa2a44dd2 100644 --- a/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala +++ b/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala @@ -1,20 +1,28 @@ package cromwell.engine -import java.nio.file.FileSystem - import cromwell.backend.BackendWorkflowDescriptor import cromwell.core.WorkflowOptions.WorkflowOption import cromwell.core.callcaching.CallCachingMode +import cromwell.core.path.PathBuilder import wdl4s._ -final case class EngineWorkflowDescriptor(backendDescriptor: BackendWorkflowDescriptor, +final case class EngineWorkflowDescriptor(namespace: WdlNamespaceWithWorkflow, + backendDescriptor: BackendWorkflowDescriptor, workflowInputs: WorkflowCoercedInputs, - backendAssignments: Map[Call, String], + backendAssignments: Map[TaskCall, String], failureMode: WorkflowFailureMode, - engineFilesystems: List[FileSystem], - callCachingMode: CallCachingMode) { - def id = backendDescriptor.id - def namespace = backendDescriptor.workflowNamespace - def name = namespace.workflow.unqualifiedName + pathBuilders: List[PathBuilder], + callCachingMode: CallCachingMode, + parentWorkflow: Option[EngineWorkflowDescriptor] = None) { + + val rootWorkflow: EngineWorkflowDescriptor = parentWorkflow match { + case Some(parent) => parent.rootWorkflow + case None => this + } + + val id = backendDescriptor.id + lazy val workflow = backendDescriptor.workflow + lazy val name = workflow.unqualifiedName + val inputs = backendDescriptor.inputs def getWorkflowOption(key: WorkflowOption) = backendDescriptor.getWorkflowOption(key) } diff --git a/engine/src/main/scala/cromwell/engine/WdlFunctions.scala b/engine/src/main/scala/cromwell/engine/WdlFunctions.scala index 3cc8ee1cacd..9fe346c50d9 100644 --- a/engine/src/main/scala/cromwell/engine/WdlFunctions.scala +++ b/engine/src/main/scala/cromwell/engine/WdlFunctions.scala @@ -1,17 +1,13 @@ package cromwell.engine -import java.nio.file.FileSystem - -import cromwell.backend.wdl.{PureFunctions, ReadLikeFunctions} -import wdl4s.expression.WdlStandardLibraryFunctions +import cromwell.backend.wdl.ReadLikeFunctions +import wdl4s.expression.PureStandardLibraryFunctionsLike +import cromwell.core.path.PathBuilder import wdl4s.values.{WdlFile, WdlValue} import scala.util.{Failure, Try} -class WdlFunctions(val fileSystems: List[FileSystem]) extends WdlStandardLibraryFunctions with ReadLikeFunctions with PureFunctions { - /** - * Ordered list of filesystems to be used to execute WDL functions needing IO. - */ +class WdlFunctions(val pathBuilders: List[PathBuilder]) extends PureStandardLibraryFunctionsLike with ReadLikeFunctions { private def fail(name: String) = Failure(new NotImplementedError(s"$name() not supported at the workflow level yet")) override def write_json(params: Seq[Try[WdlValue]]): Try[WdlFile] = fail("write_json") diff --git a/engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala b/engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala deleted file mode 100644 index e2043cb65bc..00000000000 --- a/engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala +++ /dev/null @@ -1,16 +0,0 @@ -package cromwell.engine.backend - -import cromwell.core.WorkflowOptions -import cromwell.filesystems.gcs.GoogleAuthMode -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions - -import scala.util.Try - -object EnhancedWorkflowOptions { - - implicit class GoogleAuthWorkflowOptions(val workflowOptions: WorkflowOptions) extends AnyVal { - def toGoogleAuthOptions: GoogleAuthMode.GoogleAuthOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = workflowOptions.get(key) - } - } -} diff --git a/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala b/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala index 8abb0874cab..8a72bc414cc 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala @@ -9,13 +9,15 @@ import better.files._ import cats.instances.try_._ import cats.syntax.functor._ import cromwell.core.retry.SimpleExponentialBackoff -import cromwell.core.{ExecutionStore => _, _} +import cromwell.core._ import cromwell.engine.workflow.SingleWorkflowRunnerActor._ import cromwell.engine.workflow.WorkflowManagerActor.RetrieveNewWorkflows -import cromwell.engine.workflow.workflowstore.WorkflowStoreActor +import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowStoreActor} import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.SubmitWorkflow +import cromwell.jobstore.EmptyJobStoreActor import cromwell.server.CromwellRootActor import cromwell.services.metadata.MetadataService.{GetSingleWorkflowMetadataAction, GetStatus, WorkflowOutputs} +import cromwell.subworkflowstore.EmptySubWorkflowStoreActor import cromwell.webservice.PerRequest.RequestComplete import cromwell.webservice.metadata.MetadataBuilderActor import spray.http.StatusCodes @@ -26,160 +28,151 @@ import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.{Failure, Try} -object SingleWorkflowRunnerActor { - def props(source: WorkflowSourceFiles, metadataOutputFile: Option[Path]): Props = { - Props(new SingleWorkflowRunnerActor(source, metadataOutputFile)) - } - - sealed trait RunnerMessage - // The message to actually run the workflow is made explicit so the non-actor Main can `ask` this actor to do the - // running and collect a result. - case object RunWorkflow extends RunnerMessage - private case object IssuePollRequest extends RunnerMessage - private case object IssueReply extends RunnerMessage - - sealed trait RunnerState - case object NotStarted extends RunnerState - case object RunningWorkflow extends RunnerState - case object RequestingOutputs extends RunnerState - case object RequestingMetadata extends RunnerState - case object Done extends RunnerState - - final case class RunnerData(replyTo: Option[ActorRef] = None, - terminalState: Option[WorkflowState] = None, - id: Option[WorkflowId] = None, - failures: Seq[Throwable] = Seq.empty) { - - def addFailure(message: String): RunnerData = addFailure(new RuntimeException(message)) - - def addFailure(e: Throwable): RunnerData = this.copy(failures = e +: failures) - } - - implicit class EnhancedJsObject(val jsObject: JsObject) extends AnyVal { - def state: WorkflowState = WorkflowState.fromString(jsObject.fields("status").asInstanceOf[JsString].value) - } - - private val Tag = "SingleWorkflowRunnerActor" -} - /** * Designed explicitly for the use case of the 'run' functionality in Main. This Actor will start a workflow, - * print out the outputs when complete and then shut down the actor system. Note that multiple aspects of this - * are sub-optimal for future use cases where one might want a single workflow being run. + * print out the outputs when complete and reply with a result. */ -class SingleWorkflowRunnerActor(source: WorkflowSourceFiles, metadataOutputPath: Option[Path]) - extends CromwellRootActor with LoggingFSM[RunnerState, RunnerData] { +class SingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, metadataOutputPath: Option[Path]) + extends CromwellRootActor with LoggingFSM[RunnerState, SwraData] { + + override val serverMode = false import SingleWorkflowRunnerActor._ private val backoff = SimpleExponentialBackoff(1 second, 1 minute, 1.2) - startWith(NotStarted, RunnerData()) - - private def requestMetadata: State = { - val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"MetadataRequest-Workflow-${stateData.id.get}") - metadataBuilder ! GetSingleWorkflowMetadataAction(stateData.id.get, None, None) - goto (RequestingMetadata) - } - - private def schedulePollRequest(): Unit = { - // -Ywarn-value-discard should stash Cancellable to cancel - context.system.scheduler.scheduleOnce(backoff.backoffMillis.millis, self, IssuePollRequest) - () - } - - private def requestStatus(): Unit = { - // This requests status via the metadata service rather than instituting an FSM watch on the underlying workflow actor. - // Cromwell's eventual consistency means it isn't safe to use an FSM transition to a terminal state as the signal for - // when outputs or metadata have stabilized. - val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"StatusRequest-Workflow-${stateData.id.get}-request-${UUID.randomUUID()}") - metadataBuilder ! GetStatus(stateData.id.get) - } + override val abortJobsOnTerminate = true + override lazy val workflowStore = new InMemoryWorkflowStore() + override lazy val jobStoreActor = context.actorOf(EmptyJobStoreActor.props) + override lazy val subWorkflowStoreActor = context.actorOf(EmptySubWorkflowStoreActor.props) - private def issueReply: State = { - self ! IssueReply - goto (Done) - } + startWith(NotStarted, EmptySwraData) when (NotStarted) { - case Event(RunWorkflow, data) => + case Event(RunWorkflow, EmptySwraData) => log.info(s"$Tag: Submitting workflow") workflowStoreActor ! SubmitWorkflow(source) - goto (RunningWorkflow) using data.copy(replyTo = Option(sender())) + goto(SubmittedWorkflow) using SubmittedSwraData(sender()) } - when (RunningWorkflow) { - case Event(WorkflowStoreActor.WorkflowSubmittedToStore(id), data) => + when (SubmittedWorkflow) { + case Event(WorkflowStoreActor.WorkflowSubmittedToStore(id), SubmittedSwraData(replyTo)) => log.info(s"$Tag: Workflow submitted UUID($id)") // Since we only have a single workflow, force the WorkflowManagerActor's hand in case the polling rate is long workflowManagerActor ! RetrieveNewWorkflows schedulePollRequest() - stay() using data.copy(id = Option(id)) - case Event(IssuePollRequest, data) => - data.id match { - case None => schedulePollRequest() - case _ => requestStatus() - } + goto(RunningWorkflow) using RunningSwraData(replyTo, id) + } + + when (RunningWorkflow) { + case Event(IssuePollRequest, RunningSwraData(_, id)) => + requestStatus(id) stay() - case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), data) if !jsObject.state.isTerminal => + case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), RunningSwraData(_, _)) if !jsObject.state.isTerminal => schedulePollRequest() stay() - case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), data) if jsObject.state == WorkflowSucceeded => + case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowSucceeded => val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), - s"CompleteRequest-Workflow-${stateData.id.get}-request-${UUID.randomUUID()}") - metadataBuilder ! WorkflowOutputs(data.id.get) - goto(RequestingOutputs) using data.copy(terminalState = Option(WorkflowSucceeded)) - case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), data) if jsObject.state == WorkflowFailed => - val updatedData = data.copy(terminalState = Option(WorkflowFailed)).addFailure(s"Workflow ${data.id.get} transitioned to state Failed") - // If there's an output path specified then request metadata, otherwise issue a reply to the original sender. - val nextState = if (metadataOutputPath.isDefined) requestMetadata else issueReply - nextState using updatedData + s"CompleteRequest-Workflow-$id-request-${UUID.randomUUID()}") + metadataBuilder ! WorkflowOutputs(id) + log.info(s"$Tag workflow finished with status '$WorkflowSucceeded'.") + goto(RequestingOutputs) using SucceededSwraData(replyTo, id) + case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowFailed => + log.info(s"$Tag workflow finished with status '$WorkflowFailed'.") + requestMetadataOrIssueReply(FailedSwraData(replyTo, id, new RuntimeException(s"Workflow $id transitioned to state $WorkflowFailed"))) + case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowAborted => + log.info(s"$Tag workflow finished with status '$WorkflowAborted'.") + requestMetadataOrIssueReply(AbortedSwraData(replyTo, id)) } when (RequestingOutputs) { - case Event(RequestComplete((StatusCodes.OK, outputs: JsObject)), _) => + case Event(RequestComplete((StatusCodes.OK, outputs: JsObject)), data: TerminalSwraData) => outputOutputs(outputs) - if (metadataOutputPath.isDefined) requestMetadata else issueReply + requestMetadataOrIssueReply(data) } when (RequestingMetadata) { - case Event(RequestComplete((StatusCodes.OK, metadata: JsObject)), _) => + case Event(RequestComplete((StatusCodes.OK, metadata: JsObject)), data: TerminalSwraData) => outputMetadata(metadata) - issueReply - } - - when (Done) { - case Event(IssueReply, data) => - data.terminalState foreach { state => log.info(s"$Tag workflow finished with status '$state'.") } - data.failures foreach { e => log.error(e, e.getMessage) } - - val message: Any = data.terminalState collect { case WorkflowSucceeded => () } getOrElse Status.Failure(data.failures.head) - data.replyTo foreach { _ ! message } - stay() + issueReply(data) } onTransition { case NotStarted -> RunningWorkflow => schedulePollRequest() } - private def failAndFinish(e: Throwable): State = { - log.error(e, s"$Tag received Failure message: ${e.getMessage}") - issueReply using stateData.addFailure(e) - } - whenUnhandled { // Handle failures for all failure responses generically. - case Event(r: WorkflowStoreActor.WorkflowAbortFailed, data) => failAndFinish(r.reason) - case Event(Failure(e), data) => failAndFinish(e) - case Event(Status.Failure(e), data) => failAndFinish(e) - case Event(RequestComplete((_, snap)), _) => failAndFinish(new RuntimeException(s"Unexpected API completion message: $snap")) + case Event(r: WorkflowStoreActor.WorkflowAbortFailed, data) => failAndFinish(r.reason, data) + case Event(Failure(e), data) => failAndFinish(e, data) + case Event(Status.Failure(e), data) => failAndFinish(e, data) + case Event(RequestComplete((_, snap)), data) => failAndFinish(new RuntimeException(s"Unexpected API completion message: $snap"), data) case Event((CurrentState(_, _) | Transition(_, _, _)), _) => // ignore uninteresting current state and transition messages stay() - case Event(m, _) => - log.warning(s"$Tag: received unexpected message: $m") + case Event(m, d) => + log.warning(s"$Tag: received unexpected message: $m in state ${d.getClass.getSimpleName}") stay() } + private def requestMetadataOrIssueReply(newData: TerminalSwraData) = if (metadataOutputPath.isDefined) requestMetadata(newData) else issueReply(newData) + + private def requestMetadata(newData: TerminalSwraData): State = { + val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"MetadataRequest-Workflow-${newData.id}") + metadataBuilder ! GetSingleWorkflowMetadataAction(newData.id, None, None, expandSubWorkflows = true) + goto (RequestingMetadata) using newData + } + + private def schedulePollRequest(): Unit = { + // -Ywarn-value-discard should stash Cancellable to cancel + context.system.scheduler.scheduleOnce(backoff.backoffMillis.millis, self, IssuePollRequest) + () + } + + private def requestStatus(id: WorkflowId): Unit = { + // This requests status via the metadata service rather than instituting an FSM watch on the underlying workflow actor. + // Cromwell's eventual consistency means it isn't safe to use an FSM transition to a terminal state as the signal for + // when outputs or metadata have stabilized. + val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"StatusRequest-Workflow-$id-request-${UUID.randomUUID()}") + metadataBuilder ! GetStatus(id) + } + + private def issueSuccessReply(replyTo: ActorRef): State = { + replyTo.tell(msg = (), sender = self) // Because replyTo ! () is the parameterless call replyTo.!() + context.stop(self) + stay() + } + + private def issueFailureReply(replyTo: ActorRef, e: Throwable): State = { + replyTo ! Status.Failure(e) + context.stop(self) + stay() + } + + private def issueReply(data: TerminalSwraData) = { + data match { + case s: SucceededSwraData => issueSuccessReply(s.replyTo) + case f: FailedSwraData => issueFailureReply(f.replyTo, f.failure) + case a: AbortedSwraData => issueSuccessReply(a.replyTo) + + } + } + + private def failAndFinish(e: Throwable, data: SwraData): State = { + log.error(e, s"$Tag received Failure message: ${e.getMessage}") + data match { + case EmptySwraData => + log.error(e, "Cannot issue response. Need a 'replyTo' address to issue the exception response") + context.stop(self) + stay() + case SubmittedSwraData(replyTo) => + issueFailureReply(replyTo, e) + case RunningSwraData(replyTo, _) => + issueFailureReply(replyTo, e) + case c: TerminalSwraData => + issueFailureReply(c.replyTo, e) + } + } + /** * Outputs the outputs to stdout, and then requests the metadata. */ @@ -199,3 +192,44 @@ class SingleWorkflowRunnerActor(source: WorkflowSourceFiles, metadataOutputPath: } void } } + +object SingleWorkflowRunnerActor { + def props(source: WorkflowSourceFilesCollection, metadataOutputFile: Option[Path]): Props = { + Props(new SingleWorkflowRunnerActor(source, metadataOutputFile)) + } + + sealed trait RunnerMessage + // The message to actually run the workflow is made explicit so the non-actor Main can `ask` this actor to do the + // running and collect a result. + case object RunWorkflow extends RunnerMessage + private case object IssuePollRequest extends RunnerMessage + + sealed trait RunnerState + case object NotStarted extends RunnerState + case object SubmittedWorkflow extends RunnerState + case object RunningWorkflow extends RunnerState + case object RequestingOutputs extends RunnerState + case object RequestingMetadata extends RunnerState + + sealed trait SwraData + case object EmptySwraData extends SwraData + final case class SubmittedSwraData(replyTo: ActorRef) extends SwraData + final case class RunningSwraData(replyTo: ActorRef, id: WorkflowId) extends SwraData + + sealed trait TerminalSwraData extends SwraData { def replyTo: ActorRef; def terminalState: WorkflowState; def id: WorkflowId } + final case class SucceededSwraData(replyTo: ActorRef, + id: WorkflowId) extends TerminalSwraData { override val terminalState = WorkflowSucceeded } + + final case class FailedSwraData(replyTo: ActorRef, + id: WorkflowId, + failure: Throwable) extends TerminalSwraData { override val terminalState = WorkflowFailed } + + final case class AbortedSwraData(replyTo: ActorRef, + id: WorkflowId) extends TerminalSwraData { override val terminalState = WorkflowAborted } + + implicit class EnhancedJsObject(val jsObject: JsObject) extends AnyVal { + def state: WorkflowState = WorkflowState.fromString(jsObject.fields("status").asInstanceOf[JsString].value) + } + + private val Tag = "SingleWorkflowRunnerActor" +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index 1035872d360..6d2a2ff40a6 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -1,15 +1,14 @@ package cromwell.engine.workflow -import java.time.OffsetDateTime - import akka.actor.SupervisorStrategy.Escalate import akka.actor._ import com.typesafe.config.Config -import cromwell.backend.AllBackendInitializationData +import cromwell.backend._ import cromwell.core.Dispatcher.EngineDispatcher import cromwell.core.WorkflowOptions.FinalWorkflowLogDir import cromwell.core._ import cromwell.core.logging.{WorkflowLogger, WorkflowLogging} +import cromwell.core.path.PathFactory import cromwell.engine._ import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor._ @@ -17,13 +16,12 @@ import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{Ma import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationFailedResponse, WorkflowFinalizationSucceededResponse} import cromwell.engine.workflow.lifecycle.WorkflowInitializationActor.{StartInitializationCommand, WorkflowInitializationFailedResponse, WorkflowInitializationSucceededResponse} import cromwell.engine.workflow.lifecycle._ -import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor +import cromwell.engine.workflow.lifecycle.execution.{WorkflowExecutionActor, WorkflowMetadataHelper} import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} +import cromwell.subworkflowstore.SubWorkflowStoreActor.WorkflowComplete import cromwell.webservice.EngineStatsActor - -import scala.util.Random +import wdl4s.{LocallyQualifiedName => _} object WorkflowActor { @@ -135,16 +133,18 @@ object WorkflowActor { def props(workflowId: WorkflowId, startMode: StartMode, - wdlSource: WorkflowSourceFiles, + wdlSource: WorkflowSourceFilesCollection, conf: Config, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection): Props = { + backendSingletonCollection: BackendSingletonCollection, + serverMode: Boolean): Props = { Props(new WorkflowActor(workflowId, startMode, wdlSource, conf, serviceRegistryActor, workflowLogCopyRouter, - jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection)).withDispatcher(EngineDispatcher) + jobStoreActor, subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, serverMode)).withDispatcher(EngineDispatcher) } } @@ -153,30 +153,32 @@ object WorkflowActor { */ class WorkflowActor(val workflowId: WorkflowId, startMode: StartMode, - workflowSources: WorkflowSourceFiles, + workflowSources: WorkflowSourceFilesCollection, conf: Config, - serviceRegistryActor: ActorRef, + override val serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection) - extends LoggingFSM[WorkflowActorState, WorkflowActorData] with WorkflowLogging with PathFactory { + backendSingletonCollection: BackendSingletonCollection, + serverMode: Boolean) + extends LoggingFSM[WorkflowActorState, WorkflowActorData] with WorkflowLogging with WorkflowMetadataHelper { implicit val ec = context.dispatcher + override val workflowIdForLogging = workflowId startWith(WorkflowUnstartedState, WorkflowActorData.empty) - pushCurrentStateToMetadataService(WorkflowUnstartedState.workflowState) - + pushCurrentStateToMetadataService(workflowId, WorkflowUnstartedState.workflowState) + override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() { case _ => Escalate } when(WorkflowUnstartedState) { case Event(StartWorkflowCommand, _) => - val actor = context.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryActor, workflowId), + val actor = context.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryActor, workflowId, importLocalFilesystem = !serverMode), "MaterializeWorkflowDescriptorActor") - val startEvent = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.StartTime), MetadataValue(OffsetDateTime.now.toString)) - serviceRegistryActor ! PutMetadataAction(startEvent) + pushWorkflowStart(workflowId) actor ! MaterializeWorkflowDescriptorCommand(workflowSources, conf) goto(MaterializingWorkflowDescriptorState) using stateData.copy(currentLifecycleStateActor = Option(actor)) @@ -203,10 +205,11 @@ class WorkflowActor(val workflowId: WorkflowId, case RestartExistingWorkflow => true } - val executionActor = context.actorOf(WorkflowExecutionActor.props(workflowId, + val executionActor = context.actorOf(WorkflowExecutionActor.props( workflowDescriptor, serviceRegistryActor, jobStoreActor, + subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, @@ -217,16 +220,16 @@ class WorkflowActor(val workflowId: WorkflowId, goto(ExecutingWorkflowState) using data.copy(currentLifecycleStateActor = Option(executionActor), initializationData = initializationData) case Event(WorkflowInitializationFailedResponse(reason), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, ExecutionStore.empty, OutputStore.empty, Option(reason.toList)) + finalizeWorkflow(data, workflowDescriptor, Map.empty, Map.empty, Option(reason.toList)) } when(ExecutingWorkflowState) { - case Event(WorkflowExecutionSucceededResponse(executionStore, outputStore), + case Event(WorkflowExecutionSucceededResponse(jobKeys, outputs), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, executionStore, outputStore, None) - case Event(WorkflowExecutionFailedResponse(executionStore, outputStore, failures), + finalizeWorkflow(data, workflowDescriptor, jobKeys, outputs, None) + case Event(WorkflowExecutionFailedResponse(jobKeys, failures), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, executionStore, outputStore, Option(failures.toList)) + finalizeWorkflow(data, workflowDescriptor, jobKeys, Map.empty, Option(List(failures))) case Event(msg @ EngineStatsActor.JobCountQuery, data) => data.currentLifecycleStateActor match { case Some(a) => a forward msg @@ -245,7 +248,7 @@ class WorkflowActor(val workflowId: WorkflowId, when(WorkflowAbortingState) { case Event(x: EngineLifecycleStateCompleteResponse, data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, ExecutionStore.empty, OutputStore.empty, failures = None) + finalizeWorkflow(data, workflowDescriptor, Map.empty, Map.empty, failures = None) case _ => stay() } @@ -278,22 +281,19 @@ class WorkflowActor(val workflowId: WorkflowId, // Only publish "External" state to metadata service // workflowState maps a state to an "external" state (e.g all states extending WorkflowActorRunningState map to WorkflowRunning) if (fromState.workflowState != toState.workflowState) { - pushCurrentStateToMetadataService(toState.workflowState) + pushCurrentStateToMetadataService(workflowId, toState.workflowState) } } onTransition { case (oldState, terminalState: WorkflowActorTerminalState) => workflowLogger.debug(s"transition from {} to {}. Stopping self.", arg1 = oldState, arg2 = terminalState) - // Add the end time of the workflow in the MetadataService - val now = OffsetDateTime.now - val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.EndTime), MetadataValue(now)) - serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + pushWorkflowEnd(workflowId) + subWorkflowStoreActor ! WorkflowComplete(workflowId) terminalState match { case WorkflowFailedState => val failures = nextStateData.lastStateReached.failures.getOrElse(List.empty) - val failureEvents = failures flatMap { r => throwableToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Failures}[${Random.nextInt(Int.MaxValue)}]"), r) } - serviceRegistryActor ! PutMetadataAction(failureEvents) + pushWorkflowFailures(workflowId, failures) context.parent ! WorkflowFailedResponse(workflowId, nextStateData.lastStateReached.state, failures) case _ => // The WMA is watching state transitions and needs no further info } @@ -303,7 +303,7 @@ class WorkflowActor(val workflowId: WorkflowId, stateData.workflowDescriptor foreach { wd => wd.getWorkflowOption(FinalWorkflowLogDir) match { case Some(destinationDir) => - workflowLogCopyRouter ! CopyWorkflowLogsActor.Copy(wd.id, buildPath(destinationDir, wd.engineFilesystems)) + workflowLogCopyRouter ! CopyWorkflowLogsActor.Copy(wd.id, PathFactory.buildPath(destinationDir, wd.pathBuilders)) case None if WorkflowLogger.isTemporary => workflowLogger.deleteLogFile() case _ => } @@ -323,24 +323,17 @@ class WorkflowActor(val workflowId: WorkflowId, goto(finalState) using data.copy(currentLifecycleStateActor = None) } - private[workflow] def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, outputStore: OutputStore) = { - context.actorOf(WorkflowFinalizationActor.props(workflowId, workflowDescriptor, executionStore, outputStore, stateData.initializationData), name = s"WorkflowFinalizationActor") + private[workflow] def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs) = { + context.actorOf(WorkflowFinalizationActor.props(workflowId, workflowDescriptor, jobExecutionMap, workflowOutputs, stateData.initializationData), name = s"WorkflowFinalizationActor") } /** * Run finalization actor and transition to FinalizingWorkflowState. */ private def finalizeWorkflow(data: WorkflowActorData, workflowDescriptor: EngineWorkflowDescriptor, - executionStore: ExecutionStore, outputStore: OutputStore, + jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, failures: Option[List[Throwable]]) = { - val finalizationActor = makeFinalizationActor(workflowDescriptor, executionStore, outputStore) + val finalizationActor = makeFinalizationActor(workflowDescriptor, jobExecutionMap, workflowOutputs) finalizationActor ! StartFinalizationCommand goto(FinalizingWorkflowState) using data.copy(lastStateReached = StateCheckpoint(stateName, failures)) } - - // Update the current State of the Workflow (corresponding to the FSM state) in the Metadata service - private def pushCurrentStateToMetadataService(workflowState: WorkflowState): Unit = { - val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Status), - MetadataValue(workflowState)) - serviceRegistryActor ! PutMetadataAction(metadataEventMsg) - } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala index 08bade65454..a19772cd83b 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala @@ -1,6 +1,5 @@ package cromwell.engine.workflow - import akka.actor.FSM.{CurrentState, SubscribeTransitionCallBack, Transition} import akka.actor._ import akka.event.Logging @@ -16,8 +15,10 @@ import cromwell.jobstore.JobStoreActor.{JobStoreWriteFailure, JobStoreWriteSucce import cromwell.services.metadata.MetadataService._ import cromwell.webservice.EngineStatsActor import net.ceedubs.ficus.Ficus._ +import org.apache.commons.lang3.exception.ExceptionUtils + import scala.concurrent.duration._ -import scala.concurrent.{Await, Promise} +import scala.sys.ShutdownHookThread object WorkflowManagerActor { val DefaultMaxWorkflowsToRun = 5000 @@ -42,12 +43,16 @@ object WorkflowManagerActor { serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection): Props = { - Props(new WorkflowManagerActor( - workflowStore, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection) - ).withDispatcher(EngineDispatcher) + backendSingletonCollection: BackendSingletonCollection, + abortJobsOnTerminate: Boolean, + serverMode: Boolean): Props = { + val params = WorkflowManagerActorParams(ConfigFactory.load, workflowStore, serviceRegistryActor, + workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, + abortJobsOnTerminate, serverMode) + Props(new WorkflowManagerActor(params)).withDispatcher(EngineDispatcher) } /** @@ -78,24 +83,22 @@ object WorkflowManagerActor { } } -class WorkflowManagerActor(config: Config, - val workflowStore: ActorRef, - val serviceRegistryActor: ActorRef, - val workflowLogCopyRouter: ActorRef, - val jobStoreActor: ActorRef, - val callCacheReadActor: ActorRef, - val jobTokenDispenserActor: ActorRef, - val backendSingletonCollection: BackendSingletonCollection) +case class WorkflowManagerActorParams(config: Config, + workflowStore: ActorRef, + serviceRegistryActor: ActorRef, + workflowLogCopyRouter: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + abortJobsOnTerminate: Boolean, + serverMode: Boolean) + +class WorkflowManagerActor(params: WorkflowManagerActorParams) extends LoggingFSM[WorkflowManagerState, WorkflowManagerData] { - def this(workflowStore: ActorRef, - serviceRegistryActor: ActorRef, - workflowLogCopyRouter: ActorRef, - jobStoreActor: ActorRef, - callCacheReadActor: ActorRef, - jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection) = this( - ConfigFactory.load, workflowStore, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection) + private val config = params.config private val maxWorkflowsRunning = config.getConfig("system").as[Option[Int]]("max-concurrent-workflows").getOrElse(DefaultMaxWorkflowsToRun) private val maxWorkflowsToLaunch = config.getConfig("system").as[Option[Int]]("max-workflow-launch-count").getOrElse(DefaultMaxWorkflowsToLaunch) @@ -104,9 +107,8 @@ class WorkflowManagerActor(config: Config, private val logger = Logging(context.system, this) private val tag = self.path.name - private val donePromise = Promise[Unit]() - private var abortingWorkflowToReplyTo = Map.empty[WorkflowId, ActorRef] + private var shutdownHookThreadOption: Option[ShutdownHookThread] = None override def preStart(): Unit = { addShutdownHook() @@ -114,18 +116,38 @@ class WorkflowManagerActor(config: Config, self ! RetrieveNewWorkflows } + override def postStop() = { + // If the actor is stopping, especially during error tests, then there's nothing to wait for later at JVM shutdown. + tryRemoveShutdownHook() + super.postStop() + } + private def addShutdownHook() = { - // Only abort jobs on SIGINT if the config explicitly sets system.abortJobsOnTerminate = true. + // Only abort jobs on SIGINT if the config explicitly sets system.abort-jobs-on-terminate = true. val abortJobsOnTerminate = - config.getConfig("system").as[Option[Boolean]]("abort-jobs-on-terminate").getOrElse(false) + config.getConfig("system").as[Option[Boolean]]("abort-jobs-on-terminate").getOrElse(params.abortJobsOnTerminate) if (abortJobsOnTerminate) { - sys.addShutdownHook { - logger.info(s"$tag: Received shutdown signal. Aborting all running workflows...") + val shutdownHookThread = sys.addShutdownHook { + logger.info(s"$tag: Received shutdown signal.") self ! AbortAllWorkflowsCommand - Await.result(donePromise.future, Duration.Inf) + while (stateData != null && stateData.workflows.nonEmpty) { + log.info(s"Waiting for ${stateData.workflows.size} workflows to abort...") + Thread.sleep(1000) + } } + shutdownHookThreadOption = Option(shutdownHookThread) + } + } + + private def tryRemoveShutdownHook() = { + try { + shutdownHookThreadOption.foreach(_.remove()) + } catch { + case _: IllegalStateException => /* ignore, we're probably shutting down */ + case exception: Exception => log.error(exception, "Error while removing shutdown hook: {}", exception.getMessage) } + shutdownHookThreadOption = None } startWith(Running, WorkflowManagerData(workflows = Map.empty)) @@ -140,7 +162,7 @@ class WorkflowManagerActor(config: Config, Determine the number of available workflow slots and request the smaller of that number of maxWorkflowsToLaunch. */ val maxNewWorkflows = maxWorkflowsToLaunch min (maxWorkflowsRunning - stateData.workflows.size) - workflowStore ! WorkflowStoreActor.FetchRunnableWorkflows(maxNewWorkflows) + params.workflowStore ! WorkflowStoreActor.FetchRunnableWorkflows(maxNewWorkflows) stay() case Event(WorkflowStoreActor.NoNewWorkflowsToStart, stateData) => log.debug("WorkflowStore provided no new workflows to start") @@ -178,7 +200,7 @@ class WorkflowManagerActor(config: Config, Responses from services */ case Event(WorkflowFailedResponse(workflowId, inState, reasons), data) => - log.error(s"$tag Workflow $workflowId failed (during $inState): ${reasons.mkString("\n")}") + log.error(s"$tag Workflow $workflowId failed (during $inState): ${expandFailureReasons(reasons)}") stay() /* Watched transitions @@ -187,13 +209,13 @@ class WorkflowManagerActor(config: Config, log.info(s"$tag ${workflowActor.path.name} is in a terminal state: $toState") // This silently fails if idFromActor is None, but data.without call right below will as well data.idFromActor(workflowActor) foreach { workflowId => - jobStoreActor ! RegisterWorkflowCompleted(workflowId) + params.jobStoreActor ! RegisterWorkflowCompleted(workflowId) if (toState.workflowState == WorkflowAborted) { val replyTo = abortingWorkflowToReplyTo(workflowId) replyTo ! WorkflowStoreActor.WorkflowAborted(workflowId) abortingWorkflowToReplyTo -= workflowId } else { - workflowStore ! WorkflowStoreActor.RemoveWorkflow(workflowId) + params.workflowStore ! WorkflowStoreActor.RemoveWorkflow(workflowId) } } stay using data.without(workflowActor) @@ -242,8 +264,7 @@ class WorkflowManagerActor(config: Config, onTransition { case _ -> Done => - logger.info(s"$tag All workflows finished. Stopping self.") - donePromise.trySuccess(()) + logger.info(s"$tag All workflows finished") () case fromState -> toState => logger.debug(s"$tag transitioning from $fromState to $toState") @@ -264,8 +285,9 @@ class WorkflowManagerActor(config: Config, StartNewWorkflow } - val wfProps = WorkflowActor.props(workflowId, startMode, workflow.sources, config, serviceRegistryActor, - workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection) + val wfProps = WorkflowActor.props(workflowId, startMode, workflow.sources, config, params.serviceRegistryActor, + params.workflowLogCopyRouter, params.jobStoreActor, params.subWorkflowStoreActor, params.callCacheReadActor, params.jobTokenDispenserActor, + params.backendSingletonCollection, params.serverMode) val wfActor = context.actorOf(wfProps, name = s"WorkflowActor-$workflowId") wfActor ! SubscribeTransitionCallBack(self) @@ -277,4 +299,10 @@ class WorkflowManagerActor(config: Config, private def scheduleNextNewWorkflowPoll() = { context.system.scheduler.scheduleOnce(newWorkflowPollRate, self, RetrieveNewWorkflows)(context.dispatcher) } + + private def expandFailureReasons(reasons: Seq[Throwable]) = { + reasons map { reason => + reason.getMessage + "\n" + ExceptionUtils.getStackTrace(reason) + } mkString "\n" + } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala index aa25fdfb869..55ef429beac 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala @@ -27,8 +27,7 @@ object CopyWorkflowLogsActor { // Which could be used for other copying work (outputs, call logs..) class CopyWorkflowLogsActor(serviceRegistryActor: ActorRef) extends Actor - with ActorLogging - with PathFactory { + with ActorLogging { def copyAndClean(src: Path, dest: Path) = { File(dest).parent.createDirectories() diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala index a5028e5af91..1dbf8b27069 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala @@ -5,33 +5,33 @@ import java.nio.file.Path import akka.actor.Props import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationResponse, FinalizationSuccess} import cromwell.backend.{AllBackendInitializationData, BackendConfigurationDescriptor, BackendInitializationData, BackendLifecycleActorFactory} -import cromwell.core._ import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.WorkflowOptions._ +import cromwell.core._ +import cromwell.core.path.{PathCopier, PathFactory} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.backend.{BackendConfiguration, CromwellBackends} -import wdl4s.ReportableSymbol -import wdl4s.values.WdlSingleFile +import wdl4s.values.{WdlArray, WdlMap, WdlSingleFile, WdlValue} import scala.concurrent.{ExecutionContext, Future} object CopyWorkflowOutputsActor { - def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, outputStore: OutputStore, + def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) = Props( - new CopyWorkflowOutputsActor(workflowId, workflowDescriptor, outputStore, initializationData) + new CopyWorkflowOutputsActor(workflowId, workflowDescriptor, workflowOutputs, initializationData) ).withDispatcher(IoDispatcher) } -class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: EngineWorkflowDescriptor, outputStore: OutputStore, +class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) extends EngineWorkflowFinalizationActor with PathFactory { - private def copyWorkflowOutputs(workflowOutputsFilePath: String): Unit = { - val workflowOutputsPath = buildPath(workflowOutputsFilePath, workflowDescriptor.engineFilesystems) + override val pathBuilders = workflowDescriptor.pathBuilders - val reportableOutputs = workflowDescriptor.backendDescriptor.workflowNamespace.workflow.outputs + private def copyWorkflowOutputs(workflowOutputsFilePath: String): Unit = { + val workflowOutputsPath = buildPath(workflowOutputsFilePath) - val outputFilePaths = getOutputFilePaths(reportableOutputs) + val outputFilePaths = getOutputFilePaths outputFilePaths foreach { case (workflowRootPath, srcPath) => @@ -40,23 +40,23 @@ class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: E } } - private def getOutputFilePaths(reportableOutputs: Seq[ReportableSymbol]): Seq[(Path, Path)] = { + private def findFiles(values: Seq[WdlValue]): Seq[WdlSingleFile] = { + values flatMap { + case file: WdlSingleFile => Seq(file) + case array: WdlArray => findFiles(array.value) + case map: WdlMap => findFiles(map.value.values.toSeq) + case _ => Seq.empty + } + } + + private def getOutputFilePaths: Seq[(Path, Path)] = { for { - reportableOutput <- reportableOutputs // NOTE: Without .toSeq, outputs in arrays only yield the last output - (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keys.toSeq).toSeq + backend <- workflowDescriptor.backendAssignments.values.toSeq config <- BackendConfiguration.backendConfigurationDescriptor(backend).toOption.toSeq rootPath <- getBackendRootPath(backend, config).toSeq - call <- calls - // NOTE: Without .toSeq, outputs in arrays only yield the last output - (outputCallKey, outputEntries) <- outputStore.store.toSeq - // Only get paths for the original scatter call, not the indexed entries - if outputCallKey.call == call && outputCallKey.index.isEmpty - outputEntry <- outputEntries - if reportableOutput.fullyQualifiedName == s"${call.fullyQualifiedName}.${outputEntry.name}" - wdlValue <- outputEntry.wdlValue.toSeq - collected = wdlValue collectAsSeq { case f: WdlSingleFile => f } - wdlFile <- collected + outputFiles = findFiles(workflowOutputs.values.map(_.wdlValue).toSeq) + wdlFile <- outputFiles wdlPath = rootPath.getFileSystem.getPath(wdlFile.value) } yield (rootPath, wdlPath) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala index 1a58b849ac9..74a08a8feee 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala @@ -1,8 +1,9 @@ package cromwell.engine.workflow.lifecycle -import java.nio.file.FileSystem +import java.nio.file.Files import akka.actor.{ActorRef, FSM, LoggingFSM, Props} +import better.files.File import cats.data.Validated._ import cats.instances.list._ import cats.syntax.cartesian._ @@ -14,12 +15,14 @@ import com.typesafe.scalalogging.LazyLogging import cromwell.backend.BackendWorkflowDescriptor import cromwell.core.Dispatcher.EngineDispatcher import cromwell.core.WorkflowOptions.{ReadFromCache, WorkflowOption, WriteToCache} +import cromwell.core._ import cromwell.core.callcaching._ import cromwell.core.logging.WorkflowLogging +import cromwell.core.path.PathBuilder import cromwell.engine._ import cromwell.engine.backend.CromwellBackends import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorActorData, MaterializeWorkflowDescriptorActorState} -import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata.MetadataService.{PutMetadataAction, _} import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} import cromwell.core.ErrorOr._ import net.ceedubs.ficus.Ficus._ @@ -41,15 +44,15 @@ object MaterializeWorkflowDescriptorActor { // exception if not initialized yet. def cromwellBackends = CromwellBackends.instance.get - def props(serviceRegistryActor: ActorRef, workflowId: WorkflowId, cromwellBackends: => CromwellBackends = cromwellBackends): Props = { - Props(new MaterializeWorkflowDescriptorActor(serviceRegistryActor, workflowId, cromwellBackends)).withDispatcher(EngineDispatcher) + def props(serviceRegistryActor: ActorRef, workflowId: WorkflowId, cromwellBackends: => CromwellBackends = cromwellBackends, importLocalFilesystem: Boolean): Props = { + Props(new MaterializeWorkflowDescriptorActor(serviceRegistryActor, workflowId, cromwellBackends, importLocalFilesystem)).withDispatcher(EngineDispatcher) } /* Commands */ sealed trait MaterializeWorkflowDescriptorActorMessage - case class MaterializeWorkflowDescriptorCommand(workflowSourceFiles: WorkflowSourceFiles, + case class MaterializeWorkflowDescriptorCommand(workflowSourceFiles: WorkflowSourceFilesCollection, conf: Config) extends MaterializeWorkflowDescriptorActorMessage case object MaterializeWorkflowDescriptorAbortCommand @@ -90,15 +93,17 @@ object MaterializeWorkflowDescriptorActor { } val enabled = conf.as[Option[Boolean]]("call-caching.enabled").getOrElse(false) + val invalidateBadCacheResults = conf.as[Option[Boolean]]("call-caching.invalidate-bad-cache-results").getOrElse(true) + val callCachingOptions = CallCachingOptions(invalidateBadCacheResults) if (enabled) { val readFromCache = readOptionalOption(ReadFromCache) val writeToCache = readOptionalOption(WriteToCache) (readFromCache |@| writeToCache) map { case (false, false) => CallCachingOff - case (true, false) => CallCachingActivity(ReadCache) - case (false, true) => CallCachingActivity(WriteCache) - case (true, true) => CallCachingActivity(ReadAndWriteCache) + case (true, false) => CallCachingActivity(ReadCache, callCachingOptions) + case (false, true) => CallCachingActivity(WriteCache, callCachingOptions) + case (true, true) => CallCachingActivity(ReadAndWriteCache, callCachingOptions) } } else { @@ -107,7 +112,10 @@ object MaterializeWorkflowDescriptorActor { } } -class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val workflowId: WorkflowId, cromwellBackends: => CromwellBackends) extends LoggingFSM[MaterializeWorkflowDescriptorActorState, MaterializeWorkflowDescriptorActorData] with LazyLogging with WorkflowLogging { +class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, + val workflowIdForLogging: WorkflowId, + cromwellBackends: => CromwellBackends, + importLocalFilesystem: Boolean) extends LoggingFSM[MaterializeWorkflowDescriptorActorState, MaterializeWorkflowDescriptorActorData] with LazyLogging with WorkflowLogging { import MaterializeWorkflowDescriptorActor._ @@ -119,7 +127,7 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor when(ReadyToMaterializeState) { case Event(MaterializeWorkflowDescriptorCommand(workflowSourceFiles, conf), _) => - buildWorkflowDescriptor(workflowId, workflowSourceFiles, conf) match { + buildWorkflowDescriptor(workflowIdForLogging, workflowSourceFiles, conf) match { case Valid(descriptor) => sender() ! MaterializeWorkflowDescriptorSuccessResponse(descriptor) goto(MaterializationSuccessfulState) @@ -157,57 +165,57 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor } private def buildWorkflowDescriptor(id: WorkflowId, - sourceFiles: WorkflowSourceFiles, + sourceFiles: WorkflowSourceFilesCollection, conf: Config): ErrorOr[EngineWorkflowDescriptor] = { - val namespaceValidation = validateNamespace(sourceFiles.wdlSource) + val namespaceValidation = validateNamespace(sourceFiles) val workflowOptionsValidation = validateWorkflowOptions(sourceFiles.workflowOptionsJson) (namespaceValidation |@| workflowOptionsValidation) map { (_, _) } flatMap { case (namespace, workflowOptions) => pushWfNameMetadataService(namespace.workflow.unqualifiedName) - val engineFileSystems = EngineFilesystems.filesystemsForWorkflow(workflowOptions)(iOExecutionContext) - buildWorkflowDescriptor(id, sourceFiles, namespace, workflowOptions, conf, engineFileSystems) + val pathBuilders = EngineFilesystems(context.system).pathBuildersForWorkflow(workflowOptions) + buildWorkflowDescriptor(id, sourceFiles, namespace, workflowOptions, conf, pathBuilders) } } private def pushWfNameMetadataService(name: String): Unit = { // Workflow name: - val nameEvent = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Name), MetadataValue(name)) + val nameEvent = MetadataEvent(MetadataKey(workflowIdForLogging, None, WorkflowMetadataKeys.Name), MetadataValue(name)) serviceRegistryActor ! PutMetadataAction(nameEvent) } private def buildWorkflowDescriptor(id: WorkflowId, - sourceFiles: WorkflowSourceFiles, - namespace: NamespaceWithWorkflow, + sourceFiles: WorkflowSourceFilesCollection, + namespace: WdlNamespaceWithWorkflow, workflowOptions: WorkflowOptions, conf: Config, - engineFilesystems: List[FileSystem]): ErrorOr[EngineWorkflowDescriptor] = { + pathBuilders: List[PathBuilder]): ErrorOr[EngineWorkflowDescriptor] = { val defaultBackendName = conf.as[Option[String]]("backend.default") val rawInputsValidation = validateRawInputs(sourceFiles.inputsJson) val failureModeValidation = validateWorkflowFailureMode(workflowOptions, conf) - val backendAssignmentsValidation = validateBackendAssignments(namespace.workflow.calls, workflowOptions, defaultBackendName) + val backendAssignmentsValidation = validateBackendAssignments(namespace.taskCalls, workflowOptions, defaultBackendName) val callCachingModeValidation = validateCallCachingMode(workflowOptions, conf) (rawInputsValidation |@| failureModeValidation |@| backendAssignmentsValidation |@| callCachingModeValidation ) map { (_, _, _, _) } flatMap { case (rawInputs, failureMode, backendAssignments, callCachingMode) => - buildWorkflowDescriptor(id, namespace, rawInputs, backendAssignments, workflowOptions, failureMode, engineFilesystems, callCachingMode) + buildWorkflowDescriptor(id, namespace, rawInputs, backendAssignments, workflowOptions, failureMode, pathBuilders, callCachingMode) } } private def buildWorkflowDescriptor(id: WorkflowId, - namespace: NamespaceWithWorkflow, + namespace: WdlNamespaceWithWorkflow, rawInputs: Map[String, JsValue], - backendAssignments: Map[Call, String], + backendAssignments: Map[TaskCall, String], workflowOptions: WorkflowOptions, failureMode: WorkflowFailureMode, - engineFileSystems: List[FileSystem], + pathBuilders: List[PathBuilder], callCachingMode: CallCachingMode): ErrorOr[EngineWorkflowDescriptor] = { def checkTypes(inputs: Map[FullyQualifiedName, WdlValue]): ErrorOr[Map[FullyQualifiedName, WdlValue]] = { - val allDeclarations = namespace.workflow.scopedDeclarations ++ namespace.workflow.calls.flatMap(_.scopedDeclarations) + val allDeclarations = namespace.workflow.declarations ++ namespace.workflow.calls.flatMap(_.declarations) val list: List[ErrorOr[(FullyQualifiedName, WdlValue)]] = inputs.map({ case (k, v) => allDeclarations.find(_.fullyQualifiedName == k) match { case Some(decl) if decl.wdlType.coerceRawValue(v).isFailure => @@ -223,27 +231,27 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor for { coercedInputs <- validateCoercedInputs(rawInputs, namespace) _ = pushWfInputsToMetadataService(coercedInputs) - declarations <- validateDeclarations(namespace, workflowOptions, coercedInputs, engineFileSystems) - declarationsAndInputs <- checkTypes(declarations ++ coercedInputs) - backendDescriptor = BackendWorkflowDescriptor(id, namespace, declarationsAndInputs, workflowOptions) - } yield EngineWorkflowDescriptor(backendDescriptor, coercedInputs, backendAssignments, failureMode, engineFileSystems, callCachingMode) + evaluatedWorkflowsDeclarations <- validateDeclarations(namespace, workflowOptions, coercedInputs, pathBuilders) + declarationsAndInputs <- checkTypes(evaluatedWorkflowsDeclarations ++ coercedInputs) + backendDescriptor = BackendWorkflowDescriptor(id, namespace.workflow, declarationsAndInputs, workflowOptions) + } yield EngineWorkflowDescriptor(namespace, backendDescriptor, coercedInputs, backendAssignments, failureMode, pathBuilders, callCachingMode) } private def pushWfInputsToMetadataService(workflowInputs: WorkflowCoercedInputs): Unit = { // Inputs val inputEvents = workflowInputs match { case empty if empty.isEmpty => - List(MetadataEvent.empty(MetadataKey(workflowId, None,WorkflowMetadataKeys.Inputs))) + List(MetadataEvent.empty(MetadataKey(workflowIdForLogging, None,WorkflowMetadataKeys.Inputs))) case inputs => inputs flatMap { case (inputName, wdlValue) => - wdlValueToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Inputs}:$inputName"), wdlValue) + wdlValueToMetadataEvents(MetadataKey(workflowIdForLogging, None, s"${WorkflowMetadataKeys.Inputs}:$inputName"), wdlValue) } } serviceRegistryActor ! PutMetadataAction(inputEvents) } - private def validateBackendAssignments(calls: Seq[Call], workflowOptions: WorkflowOptions, defaultBackendName: Option[String]): ErrorOr[Map[Call, String]] = { + private def validateBackendAssignments(calls: Set[TaskCall], workflowOptions: WorkflowOptions, defaultBackendName: Option[String]): ErrorOr[Map[TaskCall, String]] = { val callToBackendMap = Try { calls map { call => val backendPriorities = Seq( @@ -272,7 +280,7 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor /** * Map a call to a backend name depending on the runtime attribute key */ - private def assignBackendUsingRuntimeAttrs(call: Call): Option[String] = { + private def assignBackendUsingRuntimeAttrs(call: TaskCall): Option[String] = { val runtimeAttributesMap = call.task.runtimeAttributes.attrs runtimeAttributesMap.get(RuntimeBackendKey) map { wdlExpr => evaluateBackendNameExpression(call.fullyQualifiedName, wdlExpr) } } @@ -287,19 +295,87 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor } } - private def validateDeclarations(namespace: NamespaceWithWorkflow, + private def validateDeclarations(namespace: WdlNamespaceWithWorkflow, options: WorkflowOptions, coercedInputs: WorkflowCoercedInputs, - engineFileSystems: List[FileSystem]): ErrorOr[WorkflowCoercedInputs] = { - namespace.staticWorkflowDeclarationsRecursive(coercedInputs, new WdlFunctions(engineFileSystems)) match { + pathBuilders: List[PathBuilder]): ErrorOr[WorkflowCoercedInputs] = { + namespace.staticDeclarationsRecursive(coercedInputs, new WdlFunctions(pathBuilders)) match { case Success(d) => d.validNel case Failure(e) => s"Workflow has invalid declarations: ${e.getMessage}".invalidNel } } - private def validateNamespace(source: WdlSource): ErrorOr[NamespaceWithWorkflow] = { + private def validateImportsDirectory(zipContents: Array[Byte]): ErrorOr[File] = { + + def makeZipFile(contents: Array[Byte]): Try[File] = Try { + val dependenciesPath = Files.createTempFile("", ".zip") + Files.write(dependenciesPath, contents) + } + + def unZipFile(f: File) = Try { + val unzippedFile = f.unzip() + val unzippedFileContents = unzippedFile.toJava.listFiles().head + + if (unzippedFileContents.isDirectory) File(unzippedFileContents.getPath) + else unzippedFile + } + + val importsFile = for { + zipFile <- makeZipFile(zipContents) + unzipped <- unZipFile(zipFile) + _ <- Try(zipFile.delete(swallowIOExceptions = true)) + } yield unzipped + + importsFile match { + case Success(unzippedDirectory: File) => unzippedDirectory.validNel + case Failure(t) => t.getMessage.invalidNel + } + } + + private def validateNamespaceWithImports(w: WorkflowSourceFilesWithDependenciesZip): ErrorOr[WdlNamespaceWithWorkflow] = { + def getMetadatae(importsDir: File, prefix: String = ""): Seq[(String, File)] = { + importsDir.children.toSeq flatMap { + case f: File if f.isDirectory => getMetadatae(f, prefix + f.name + "/") + case f: File if f.name.endsWith(".wdl") => Seq((prefix + f.name, f)) + case _ => Seq.empty + } + } + + def writeMetadatae(importsDir: File) = { + import scala.collection.JavaConverters._ + + val wfImportEvents = getMetadatae(importsDir) map { case (name: String, f: File) => + val contents = Files.readAllLines(f.path).asScala.mkString(System.lineSeparator()) + MetadataEvent(MetadataKey(workflowIdForLogging, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Imports, name), MetadataValue(contents)) + } + serviceRegistryActor ! PutMetadataAction(wfImportEvents) + } + + validateImportsDirectory(w.importsZip) flatMap { importsDir => + writeMetadatae(importsDir) + val importResolvers: Seq[ImportResolver] = if (importLocalFilesystem) { + List(WdlNamespace.directoryResolver(importsDir), WdlNamespace.fileResolver) + } else { + List(WdlNamespace.directoryResolver(importsDir)) + } + val results = WdlNamespaceWithWorkflow.load(w.wdlSource, importResolvers) + importsDir.delete(swallowIOExceptions = true) + results.validNel + } + } + + private def validateNamespace(source: WorkflowSourceFilesCollection): ErrorOr[WdlNamespaceWithWorkflow] = { try { - NamespaceWithWorkflow.load(source).validNel + source match { + case w: WorkflowSourceFilesWithDependenciesZip => validateNamespaceWithImports(w) + case w: WorkflowSourceFilesWithoutImports => + val importResolvers: Seq[ImportResolver] = if (importLocalFilesystem) { + List(WdlNamespace.fileResolver) + } else { + List.empty + } + WdlNamespaceWithWorkflow.load(w.wdlSource, importResolvers).validNel + } } catch { case e: Exception => s"Unable to load namespace from workflow: ${e.getMessage}".invalidNel } @@ -314,7 +390,7 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor } private def validateCoercedInputs(rawInputs: Map[String, JsValue], - namespace: NamespaceWithWorkflow): ErrorOr[WorkflowCoercedInputs] = { + namespace: WdlNamespaceWithWorkflow): ErrorOr[WorkflowCoercedInputs] = { namespace.coerceRawInputs(rawInputs) match { case Success(r) => r.validNel case Failure(e: ExceptionWithErrors) => Invalid(e.errors) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala index 9614696e2e0..5c4b777a50b 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala @@ -1,14 +1,15 @@ package cromwell.engine.workflow.lifecycle import akka.actor.{FSM, Props} -import cromwell.backend.AllBackendInitializationData import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationFailed, FinalizationSuccess, Finalize} +import cromwell.backend._ import cromwell.core.Dispatcher.EngineDispatcher -import cromwell.core.{ExecutionStore, OutputStore, WorkflowId} +import cromwell.core.{CallOutputs, WorkflowId} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.backend.CromwellBackends import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor._ import cromwell.engine.workflow.lifecycle.WorkflowLifecycleActor._ +import wdl4s.TaskCall import scala.util.{Failure, Success, Try} @@ -37,14 +38,14 @@ object WorkflowFinalizationActor { case object WorkflowFinalizationSucceededResponse extends WorkflowLifecycleSuccessResponse final case class WorkflowFinalizationFailedResponse(reasons: Seq[Throwable]) extends WorkflowLifecycleFailureResponse - def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, - outputStore: OutputStore, initializationData: AllBackendInitializationData): Props = { - Props(new WorkflowFinalizationActor(workflowId, workflowDescriptor, executionStore, outputStore, initializationData)).withDispatcher(EngineDispatcher) + def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData): Props = { + Props(new WorkflowFinalizationActor(workflowId, workflowDescriptor, jobExecutionMap, workflowOutputs, initializationData)).withDispatcher(EngineDispatcher) } } -case class WorkflowFinalizationActor(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, - executionStore: ExecutionStore, outputStore: OutputStore, initializationData: AllBackendInitializationData) +case class WorkflowFinalizationActor(workflowIdForLogging: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, + jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) extends WorkflowLifecycleActor[WorkflowFinalizationActorState] { val tag = self.path.name @@ -62,16 +63,16 @@ case class WorkflowFinalizationActor(workflowId: WorkflowId, workflowDescriptor: case Event(StartFinalizationCommand, _) => val backendFinalizationActors = Try { for { - (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keys.toSeq) + (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keySet) props <- CromwellBackends.backendLifecycleFactoryActorByName(backend).map( - _.workflowFinalizationActorProps(workflowDescriptor.backendDescriptor, calls, executionStore, outputStore, initializationData.get(backend)) + _.workflowFinalizationActorProps(workflowDescriptor.backendDescriptor, calls, filterJobExecutionsForBackend(calls), workflowOutputs, initializationData.get(backend)) ).get actor = context.actorOf(props, backend) } yield actor } val engineFinalizationActor = Try { - context.actorOf(CopyWorkflowOutputsActor.props(workflowId, workflowDescriptor, outputStore, initializationData), + context.actorOf(CopyWorkflowOutputsActor.props(workflowIdForLogging, workflowDescriptor, workflowOutputs, initializationData), "CopyWorkflowOutputsActor") } @@ -95,6 +96,15 @@ case class WorkflowFinalizationActor(workflowId: WorkflowId, workflowDescriptor: goto(WorkflowFinalizationFailedState) } } + + // Only send to each backend the jobs that it executed + private def filterJobExecutionsForBackend(calls: Set[TaskCall]): JobExecutionMap = { + jobExecutionMap map { + case (wd, executedKeys) => wd -> (executedKeys filter { jobKey => calls.contains(jobKey.call) }) + } filter { + case (wd, keys) => keys.nonEmpty + } + } when(FinalizationInProgressState) { case Event(FinalizationSuccess, stateData) => checkForDoneAndTransition(stateData.withSuccess(sender)) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala index 2fd5d75aa10..14e8a31a49f 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala @@ -50,7 +50,7 @@ object WorkflowInitializationActor { case class BackendActorAndBackend(actor: ActorRef, backend: String) } -case class WorkflowInitializationActor(workflowId: WorkflowId, +case class WorkflowInitializationActor(workflowIdForLogging: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, serviceRegistryActor: ActorRef) extends AbortableWorkflowLifecycleActor[WorkflowInitializationActorState] { @@ -78,7 +78,7 @@ case class WorkflowInitializationActor(workflowId: WorkflowId, case Event(StartInitializationCommand, _) => val backendInitializationActors = Try { for { - (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keys.toSeq) + (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keySet) props <- CromwellBackends.backendLifecycleFactoryActorByName(backend).map(factory => factory.workflowInitializationActorProps(workflowDescriptor.backendDescriptor, calls, serviceRegistryActor) ).get diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala new file mode 100644 index 00000000000..80748f6dda7 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala @@ -0,0 +1,135 @@ +package cromwell.engine.workflow.lifecycle.execution + +import java.time.OffsetDateTime + +import akka.actor.ActorRef +import cromwell.backend.BackendJobDescriptorKey +import cromwell.core.ExecutionStatus._ +import cromwell.core._ +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata._ +import wdl4s._ +import wdl4s.values.WdlValue + +import scala.util.Random + +trait CallMetadataHelper { + + def workflowIdForCallMetadata: WorkflowId + def serviceRegistryActor: ActorRef + + def pushNewCallMetadata(callKey: CallKey, backendName: Option[String]) = { + val startEvents = List( + Option(MetadataEvent(metadataKeyForCall(callKey, CallMetadataKeys.Start), MetadataValue(OffsetDateTime.now))), + backendName map { name => MetadataEvent(metadataKeyForCall(callKey, CallMetadataKeys.Backend), MetadataValue(name)) } + ).flatten + + serviceRegistryActor ! PutMetadataAction(startEvents) + } + + def pushQueuedCallMetadata(diffs: Seq[WorkflowExecutionDiff]) = { + val startingEvents = for { + diff <- diffs + (jobKey, executionState) <- diff.executionStoreChanges if jobKey.isInstanceOf[BackendJobDescriptorKey] && executionState == ExecutionStatus.QueuedInCromwell + } yield MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.QueuedInCromwell)) + serviceRegistryActor ! PutMetadataAction(startingEvents) + } + + def pushStartingCallMetadata(callKey: CallKey) = { + val statusChange = MetadataEvent(metadataKeyForCall(callKey, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.Starting)) + serviceRegistryActor ! PutMetadataAction(statusChange) + } + + def pushRunningCallMetadata(key: CallKey, evaluatedInputs: EvaluatedTaskInputs) = { + val inputEvents = evaluatedInputs match { + case empty if empty.isEmpty => + List(MetadataEvent.empty(metadataKeyForCall(key, s"${CallMetadataKeys.Inputs}"))) + case inputs => + inputs flatMap { + case (inputName, inputValue) => + wdlValueToMetadataEvents(metadataKeyForCall(key, s"${CallMetadataKeys.Inputs}:${inputName.unqualifiedName}"), inputValue) + } + } + + val runningEvent = List(MetadataEvent(metadataKeyForCall(key, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.Running))) + + serviceRegistryActor ! PutMetadataAction(runningEvent ++ inputEvents) + } + + def pushWorkflowOutputMetadata(outputs: Map[LocallyQualifiedName, WdlValue]) = { + val events = outputs match { + case empty if empty.isEmpty => List(MetadataEvent.empty(MetadataKey(workflowIdForCallMetadata, None, WorkflowMetadataKeys.Outputs))) + case _ => outputs flatMap { + case (outputName, outputValue) => + wdlValueToMetadataEvents(MetadataKey(workflowIdForCallMetadata, None, s"${WorkflowMetadataKeys.Outputs}:$outputName"), outputValue) + } + } + + serviceRegistryActor ! PutMetadataAction(events) + } + + def pushSuccessfulCallMetadata(jobKey: JobKey, returnCode: Option[Int], outputs: CallOutputs) = { + val completionEvents = completedCallMetadataEvents(jobKey, ExecutionStatus.Done, returnCode) + + val outputEvents = outputs match { + case empty if empty.isEmpty => + List(MetadataEvent.empty(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Outputs}"))) + case _ => + outputs flatMap { case (lqn, outputValue) => wdlValueToMetadataEvents(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Outputs}:$lqn"), outputValue.wdlValue) } + } + + serviceRegistryActor ! PutMetadataAction(completionEvents ++ outputEvents) + } + + def pushFailedCallMetadata(jobKey: JobKey, returnCode: Option[Int], failure: Throwable, retryableFailure: Boolean) = { + val failedState = if (retryableFailure) ExecutionStatus.Preempted else ExecutionStatus.Failed + val completionEvents = completedCallMetadataEvents(jobKey, failedState, returnCode) + val retryableFailureEvent = MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.RetryableFailure), MetadataValue(retryableFailure)) + val failureEvents = throwableToMetadataEvents(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Failures}[$randomNumberString]"), failure).+:(retryableFailureEvent) + + serviceRegistryActor ! PutMetadataAction(completionEvents ++ failureEvents) + } + + def pushExecutionEventsToMetadataService(jobKey: JobKey, eventList: Seq[ExecutionEvent]) = { + def metadataEvent(k: String, value: Any) = { + val metadataValue = MetadataValue(value) + val metadataKey = metadataKeyForCall(jobKey, k) + MetadataEvent(metadataKey, metadataValue) + } + + eventList.headOption foreach { firstEvent => + // The final event is only used as the book-end for the final pairing so the name is never actually used... + val offset = firstEvent.offsetDateTime.getOffset + val now = OffsetDateTime.now.withOffsetSameInstant(offset) + val lastEvent = ExecutionEvent("!!Bring Back the Monarchy!!", now) + val tailedEventList = eventList :+ lastEvent + val events = tailedEventList.sliding(2).zipWithIndex flatMap { + case (Seq(eventCurrent, eventNext), index) => + val eventKey = s"executionEvents[$index]" + List( + metadataEvent(s"$eventKey:description", eventCurrent.name), + metadataEvent(s"$eventKey:startTime", eventCurrent.offsetDateTime), + metadataEvent(s"$eventKey:endTime", eventNext.offsetDateTime) + ) + } + + serviceRegistryActor ! PutMetadataAction(events.toIterable) + } + } + + private def completedCallMetadataEvents(jobKey: JobKey, executionStatus: ExecutionStatus, returnCode: Option[Int]) = { + val returnCodeEvent = returnCode map { rc => + List(MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.ReturnCode), MetadataValue(rc))) + } + + List( + MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.ExecutionStatus), MetadataValue(executionStatus)), + MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.End), MetadataValue(OffsetDateTime.now)) + ) ++ returnCodeEvent.getOrElse(List.empty) + } + + private def metadataKeyForCall(jobKey: JobKey, myKey: String) = MetadataKey(workflowIdForCallMetadata, Option(MetadataJobKey(jobKey.scope.fullyQualifiedName, jobKey.index, jobKey.attempt)), myKey) + + private def randomNumberString: String = Random.nextInt.toString.stripPrefix("-") + +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala index 25c1852dc5e..afa48474d78 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala @@ -1,7 +1,5 @@ package cromwell.engine.workflow.lifecycle.execution -import java.time.OffsetDateTime - import akka.actor.{ActorRef, ActorRefFactory, LoggingFSM, Props} import akka.routing.RoundRobinPool import cats.data.NonEmptyList @@ -14,8 +12,8 @@ import cromwell.core._ import cromwell.core.callcaching._ import cromwell.core.logging.WorkflowLogging import cromwell.core.simpleton.WdlValueSimpleton +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor.{BackendJobPreparationSucceeded, CallPreparationFailed} import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.{BackendJobPreparationFailed, BackendJobPreparationSucceeded} import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, CallCacheHashes, HashError} import cromwell.engine.workflow.lifecycle.execution.callcaching.FetchCachedResultsActor.{CachedOutputLookupFailed, CachedOutputLookupSucceeded} import cromwell.engine.workflow.lifecycle.execution.callcaching._ @@ -23,8 +21,6 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor.{JobExecu import cromwell.jobstore.JobStoreActor._ import cromwell.jobstore.{Pending => _, _} import cromwell.services.SingletonServicesStore -import cromwell.services.metadata.MetadataService.PutMetadataAction -import cromwell.services.metadata.{MetadataEvent, MetadataJobKey, MetadataKey, MetadataValue} import wdl4s.TaskOutput import scala.concurrent.ExecutionContext @@ -36,17 +32,18 @@ class EngineJobExecutionActor(replyTo: ActorRef, factory: BackendLifecycleActorFactory, initializationData: Option[BackendInitializationData], restarting: Boolean, - serviceRegistryActor: ActorRef, + val serviceRegistryActor: ActorRef, jobStoreActor: ActorRef, callCacheReadActor: ActorRef, jobTokenDispenserActor: ActorRef, backendSingletonActor: Option[ActorRef], backendName: String, - callCachingMode: CallCachingMode) extends LoggingFSM[EngineJobExecutionActorState, EJEAData] with WorkflowLogging { + callCachingMode: CallCachingMode) extends LoggingFSM[EngineJobExecutionActorState, EJEAData] with WorkflowLogging with CallMetadataHelper { - override val workflowId = executionData.workflowDescriptor.id + override val workflowIdForLogging = executionData.workflowDescriptor.id + override val workflowIdForCallMetadata = executionData.workflowDescriptor.id - val jobTag = s"${workflowId.shortString}:${jobDescriptorKey.call.fullyQualifiedName}:${jobDescriptorKey.index.fromIndex}:${jobDescriptorKey.attempt}" + val jobTag = s"${workflowIdForLogging.shortString}:${jobDescriptorKey.call.fullyQualifiedName}:${jobDescriptorKey.index.fromIndex}:${jobDescriptorKey.attempt}" val tag = s"EJEA_$jobTag" // There's no need to check for a cache hit again if we got preempted, or if there's no result copying actor defined @@ -78,8 +75,9 @@ class EngineJobExecutionActor(replyTo: ActorRef, when(RequestingExecutionToken) { case Event(JobExecutionTokenDispensed(jobExecutionToken), NoData) => executionToken = Option(jobExecutionToken) + replyTo ! JobStarting(jobDescriptorKey) if (restarting) { - val jobStoreKey = jobDescriptorKey.toJobStoreKey(workflowId) + val jobStoreKey = jobDescriptorKey.toJobStoreKey(workflowIdForLogging) jobStoreActor ! QueryJobCompletion(jobStoreKey, jobDescriptorKey.call.task.outputs) goto(CheckingJobStore) } else { @@ -96,9 +94,9 @@ class EngineJobExecutionActor(replyTo: ActorRef, prepareJob() case Event(JobComplete(jobResult), NoData) => val response = jobResult match { - case JobResultSuccess(returnCode, jobOutputs) => SucceededResponse(jobDescriptorKey, returnCode, jobOutputs, None, Seq.empty) - case JobResultFailure(returnCode, reason, false) => FailedNonRetryableResponse(jobDescriptorKey, reason, returnCode) - case JobResultFailure(returnCode, reason, true) => FailedRetryableResponse(jobDescriptorKey, reason, returnCode) + case JobResultSuccess(returnCode, jobOutputs) => JobSucceededResponse(jobDescriptorKey, returnCode, jobOutputs, None, Seq.empty) + case JobResultFailure(returnCode, reason, false) => JobFailedNonRetryableResponse(jobDescriptorKey, reason, returnCode) + case JobResultFailure(returnCode, reason, true) => JobFailedRetryableResponse(jobDescriptorKey, reason, returnCode) } respondAndStop(response) case Event(f: JobStoreReadFailure, NoData) => @@ -120,8 +118,8 @@ class EngineJobExecutionActor(replyTo: ActorRef, runJob(updatedData) case CallCachingOff => runJob(updatedData) } - case Event(response: BackendJobPreparationFailed, NoData) => - forwardAndStop(response) + case Event(CallPreparationFailed(jobKey: BackendJobDescriptorKey, throwable), NoData) => + respondAndStop(JobFailedNonRetryableResponse(jobKey, throwable, None)) } private val callCachingReadResultMetadataKey = "Call caching read result" @@ -156,19 +154,16 @@ class EngineJobExecutionActor(replyTo: ActorRef, when(BackendIsCopyingCachedOutputs) { // Backend copying response: - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _)) => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _)) => saveCacheResults(hashes, data.withSuccessResponse(response)) - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, None, _)) if effectiveCallCachingMode.writeToCache => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, None, _)) if effectiveCallCachingMode.writeToCache => // Wait for the CallCacheHashes stay using data.withSuccessResponse(response) - case Event(response: SucceededResponse, data: ResponsePendingData) => // bad hashes or cache write off + case Event(response: JobSucceededResponse, data: ResponsePendingData) => // bad hashes or cache write off saveJobCompletionToJobStore(data.withSuccessResponse(response)) case Event(response: BackendJobExecutionResponse, data @ ResponsePendingData(_, _, _, Some(cacheHit))) => response match { - case f: BackendJobFailedResponse => - invalidateCacheHit(cacheHit.cacheResultIds.head) - log.error(f.throwable, "Failed copying cache results for job {}, invalidating cache entry.", jobDescriptorKey) - goto(InvalidatingCacheEntry) + case f: BackendJobFailedResponse => invalidateCacheHitAndTransition(cacheHit.cacheResultIds.head, data, f.throwable) case _ => runJob(data) } @@ -216,10 +211,10 @@ class EngineJobExecutionActor(replyTo: ActorRef, disableCallCaching(t) stay using data.copy(hashes = Option(Failure(t))) - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _)) if effectiveCallCachingMode.writeToCache => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _)) if effectiveCallCachingMode.writeToCache => eventList ++= response.executionEvents saveCacheResults(hashes, data.withSuccessResponse(response)) - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, None, _)) if effectiveCallCachingMode.writeToCache => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, None, _)) if effectiveCallCachingMode.writeToCache => log.debug(s"Got job result for {}, awaiting hashes", jobTag) stay using data.withSuccessResponse(response) case Event(response: BackendJobExecutionResponse, data: ResponsePendingData) => @@ -240,7 +235,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, case Event(JobStoreWriteSuccess(_), data: ResponseData) => forwardAndStop(data.response) case Event(JobStoreWriteFailure(t), data: ResponseData) => - respondAndStop(FailedNonRetryableResponse(jobDescriptorKey, new Exception(s"JobStore write failure: ${t.getMessage}", t), None)) + respondAndStop(JobFailedNonRetryableResponse(jobDescriptorKey, new Exception(s"JobStore write failure: ${t.getMessage}", t), None)) } onTransition { @@ -266,7 +261,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def forwardAndStop(response: Any): State = { replyTo forward response returnExecutionToken() - tellEventMetadata() + pushExecutionEventsToMetadataService(jobDescriptorKey, eventList) context stop self stay() } @@ -274,7 +269,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def respondAndStop(response: Any): State = { replyTo ! response returnExecutionToken() - tellEventMetadata() + pushExecutionEventsToMetadataService(jobDescriptorKey, eventList) context stop self stay() } @@ -286,7 +281,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, } private def disableCacheWrite(reason: Throwable) = { - log.error("{}: Disabling cache writing for this job.", jobTag) + log.error(reason, "{}: Disabling cache writing for this job.", jobTag) if (effectiveCallCachingMode.writeToCache) { effectiveCallCachingMode = effectiveCallCachingMode.withoutWrite writeCallCachingModeToMetadata() @@ -302,7 +297,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, val jobPreparationActorName = s"BackendPreparationActor_for_$jobTag" val jobPrepProps = JobPreparationActor.props(executionData, jobDescriptorKey, factory, initializationData, serviceRegistryActor, backendSingletonActor) val jobPreparationActor = createJobPreparationActor(jobPrepProps, jobPreparationActorName) - jobPreparationActor ! JobPreparationActor.Start + jobPreparationActor ! CallPreparationActor.Start goto(PreparingJob) } @@ -335,13 +330,13 @@ class EngineJobExecutionActor(replyTo: ActorRef, val backendCacheHitCopyingActorProps = propsMaker(data.jobDescriptor, initializationData, serviceRegistryActor) val cacheHitCopyActor = context.actorOf(backendCacheHitCopyingActorProps, buildCacheHitCopyingActorName(data.jobDescriptor)) cacheHitCopyActor ! CopyOutputsCommand(wdlValueSimpletons, jobDetritusFiles, returnCode) - replyTo ! JobRunning(data.jobDescriptor, None) + replyTo ! JobRunning(data.jobDescriptor.key, data.jobDescriptor.inputDeclarations, None) goto(BackendIsCopyingCachedOutputs) case None => // This should be impossible with the FSM, but luckily, we CAN recover if some foolish future programmer makes this happen: val errorMessage = "Call caching copying should never have even been attempted with no copy actor props! (Programmer error!)" log.error(errorMessage) - self ! FailedNonRetryableResponse(data.jobDescriptor.key, new RuntimeException(errorMessage), None) + self ! JobFailedNonRetryableResponse(data.jobDescriptor.key, new RuntimeException(errorMessage), None) goto(BackendIsCopyingCachedOutputs) } } @@ -350,7 +345,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, val backendJobExecutionActor = context.actorOf(data.bjeaProps, buildJobExecutionActorName(data.jobDescriptor)) val message = if (restarting) RecoverJobCommand else ExecuteJobCommand backendJobExecutionActor ! message - replyTo ! JobRunning(data.jobDescriptor, Option(backendJobExecutionActor)) + replyTo ! JobRunning(data.jobDescriptor.key, data.jobDescriptor.inputDeclarations, Option(backendJobExecutionActor)) goto(RunningJob) using data } @@ -371,19 +366,33 @@ class EngineJobExecutionActor(replyTo: ActorRef, } private def buildJobExecutionActorName(jobDescriptor: BackendJobDescriptor) = { - s"$workflowId-BackendJobExecutionActor-$jobTag" + s"$workflowIdForLogging-BackendJobExecutionActor-$jobTag" } private def buildCacheHitCopyingActorName(jobDescriptor: BackendJobDescriptor) = { - s"$workflowId-BackendCacheHitCopyingActor-$jobTag" + s"$workflowIdForLogging-BackendCacheHitCopyingActor-$jobTag" } - protected def createSaveCacheResultsActor(hashes: CallCacheHashes, success: SucceededResponse): Unit = { + protected def createSaveCacheResultsActor(hashes: CallCacheHashes, success: JobSucceededResponse): Unit = { val callCache = new CallCache(SingletonServicesStore.databaseInterface) - context.actorOf(CallCacheWriteActor.props(callCache, workflowId, hashes, success), s"CallCacheWriteActor-$tag") + context.actorOf(CallCacheWriteActor.props(callCache, workflowIdForLogging, hashes, success), s"CallCacheWriteActor-$tag") () } + private def invalidateCacheHitAndTransition(cacheId: CallCachingEntryId, data: ResponsePendingData, reason: Throwable) = { + val invalidationRequired = effectiveCallCachingMode match { + case CallCachingOff => throw new RuntimeException("Should not be calling invalidateCacheHit if call caching is off!") // Very unexpected. Fail out of this bad-state EJEA. + case activity: CallCachingActivity => activity.options.invalidateBadCacheResults + } + if (invalidationRequired) { + log.error(reason, "Failed copying cache results for job {}, invalidating cache entry.", jobDescriptorKey) + invalidateCacheHit(cacheId) + goto(InvalidatingCacheEntry) + } else { + handleCacheInvalidatedResponse(CallCacheInvalidationUnnecessary, data) + } + } + protected def invalidateCacheHit(cacheId: CallCachingEntryId): Unit = { val callCache = new CallCache(SingletonServicesStore.databaseInterface) context.actorOf(CallCacheInvalidateActor.props(callCache, cacheId), s"CallCacheInvalidateActor${cacheId.id}-$tag") @@ -398,71 +407,37 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def saveJobCompletionToJobStore(updatedData: ResponseData) = { updatedData.response match { - case SucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: JobOutputs, _, _) => saveSuccessfulJobResults(jobKey, returnCode, jobOutputs) + case JobSucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: CallOutputs, _, _) => saveSuccessfulJobResults(jobKey, returnCode, jobOutputs) case AbortedResponse(jobKey: BackendJobDescriptorKey) => log.debug("{}: Won't save aborted job response to JobStore", jobTag) forwardAndStop(updatedData.response) - case FailedNonRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = false) - case FailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = true) + case JobFailedNonRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = false) + case JobFailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = true) } goto(UpdatingJobStore) using updatedData } - private def saveSuccessfulJobResults(jobKey: JobKey, returnCode: Option[Int], outputs: JobOutputs) = { - val jobStoreKey = jobKey.toJobStoreKey(workflowId) + private def saveSuccessfulJobResults(jobKey: JobKey, returnCode: Option[Int], outputs: CallOutputs) = { + val jobStoreKey = jobKey.toJobStoreKey(workflowIdForLogging) val jobStoreResult = JobResultSuccess(returnCode, outputs) jobStoreActor ! RegisterJobCompleted(jobStoreKey, jobStoreResult) } private def saveUnsuccessfulJobResults(jobKey: JobKey, returnCode: Option[Int], reason: Throwable, retryable: Boolean) = { - val jobStoreKey = jobKey.toJobStoreKey(workflowId) + val jobStoreKey = jobKey.toJobStoreKey(workflowIdForLogging) val jobStoreResult = JobResultFailure(returnCode, reason, retryable) jobStoreActor ! RegisterJobCompleted(jobStoreKey, jobStoreResult) } private def writeToMetadata(keyValues: Map[String, String]) = { import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter - serviceRegistryActor.putMetadata(workflowId, Option(jobDescriptorKey), keyValues) + serviceRegistryActor.putMetadata(workflowIdForLogging, Option(jobDescriptorKey), keyValues) } private def addHashesAndStay(data: ResponsePendingData, hashes: CallCacheHashes): State = { val updatedData = data.copy(hashes = Option(Success(hashes))) stay using updatedData } - - /** - * Fire and forget events to the metadata service - */ - private def tellEventMetadata(): Unit = { - eventList.headOption foreach { firstEvent => - // The final event is only used as the book-end for the final pairing so the name is never actually used... - val offset = firstEvent.offsetDateTime.getOffset - val now = OffsetDateTime.now.withOffsetSameInstant(offset) - val lastEvent = ExecutionEvent("!!Bring Back the Monarchy!!", now) - val tailedEventList = eventList :+ lastEvent - val events = tailedEventList.sliding(2).zipWithIndex flatMap { - case (Seq(eventCurrent, eventNext), index) => - val eventKey = s"executionEvents[$index]" - List( - metadataEvent(s"$eventKey:description", eventCurrent.name), - metadataEvent(s"$eventKey:startTime", eventCurrent.offsetDateTime), - metadataEvent(s"$eventKey:endTime", eventNext.offsetDateTime) - ) - } - - serviceRegistryActor ! PutMetadataAction(events.toIterable) - } - } - - private def metadataEvent(key: String, value: Any) = { - val metadataValue = MetadataValue(value) - MetadataEvent(metadataKey(key), metadataValue) - } - - private lazy val metadataJobKey = { - MetadataJobKey(jobDescriptorKey.call.fullyQualifiedName, jobDescriptorKey.index, jobDescriptorKey.attempt) - } - private def metadataKey(key: String) = MetadataKey(workflowId, Option(metadataJobKey), key) } object EngineJobExecutionActor { @@ -484,8 +459,6 @@ object EngineJobExecutionActor { sealed trait EngineJobExecutionActorCommand case object Execute extends EngineJobExecutionActorCommand - final case class JobRunning(jobDescriptor: BackendJobDescriptor, backendJobExecutionActor: Option[ActorRef]) - def props(replyTo: ActorRef, jobDescriptorKey: BackendJobDescriptorKey, executionData: WorkflowExecutionActorData, @@ -526,10 +499,10 @@ object EngineJobExecutionActor { hashes: Option[Try[CallCacheHashes]] = None, cacheHit: Option[CacheHit] = None) extends EJEAData { - def withSuccessResponse(success: SucceededResponse) = SucceededResponseData(success, hashes) + def withSuccessResponse(success: JobSucceededResponse) = SucceededResponseData(success, hashes) def withResponse(response: BackendJobExecutionResponse) = response match { - case success: SucceededResponse => SucceededResponseData(success, hashes) + case success: JobSucceededResponse => SucceededResponseData(success, hashes) case failure => NotSucceededResponseData(failure, hashes) } @@ -546,7 +519,7 @@ object EngineJobExecutionActor { def hashes: Option[Try[CallCacheHashes]] } - private[execution] case class SucceededResponseData(successResponse: SucceededResponse, + private[execution] case class SucceededResponseData(successResponse: JobSucceededResponse, hashes: Option[Try[CallCacheHashes]] = None) extends ResponseData { override def response = successResponse } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala new file mode 100644 index 00000000000..3b9d1af1f56 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala @@ -0,0 +1,109 @@ +package cromwell.engine.workflow.lifecycle.execution + +import cromwell.backend.BackendJobDescriptorKey +import cromwell.core.ExecutionStatus._ +import cromwell.core.{CallKey, ExecutionStatus, JobKey} +import cromwell.engine.workflow.lifecycle.execution.ExecutionStore.ExecutionStoreEntry +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{apply => _, _} +import wdl4s._ + + +object ExecutionStore { + def empty = ExecutionStore(Map.empty[JobKey, ExecutionStatus]) + type ExecutionStoreEntry = (JobKey, ExecutionStatus) + def apply(workflow: Workflow, workflowCoercedInputs: WorkflowCoercedInputs) = { + // Only add direct children to the store, the rest is dynamically created when necessary + val keys = workflow.children map { + case call: TaskCall => Option(BackendJobDescriptorKey(call, None, 1)) + case call: WorkflowCall => Option(SubWorkflowKey(call, None, 1)) + case scatter: Scatter => Option(ScatterKey(scatter)) + case declaration: Declaration => Option(DeclarationKey(declaration, None, workflowCoercedInputs)) + case _ => None // Ifs will need to be added here when supported + } + + new ExecutionStore(keys.flatten.map(_ -> NotStarted).toMap) + } +} + +case class ExecutionStore(store: Map[JobKey, ExecutionStatus]) { + def add(values: Map[JobKey, ExecutionStatus]) = this.copy(store = store ++ values) + + // Convert the store to a `List` before `collect`ing to sidestep expensive and pointless hashing of `Scope`s when + // assembling the result. + def runnableScopes = store.toList collect { case entry if isRunnable(entry) => entry._1 } + + def backendJobKeys = store.keys.toList collect { case k: BackendJobDescriptorKey => k } + + private def isRunnable(entry: ExecutionStoreEntry) = { + entry match { + case (key, ExecutionStatus.NotStarted) => arePrerequisitesDone(key) + case _ => false + } + } + + def findShardEntries(key: CollectorKey): List[ExecutionStoreEntry] = store.toList filter { + case (k: CallKey, v) => k.scope == key.scope && k.isShard + case (k: DeclarationKey, v) => k.scope == key.scope && k.isShard + case _ => false + } + + private def arePrerequisitesDone(key: JobKey): Boolean = { + val upstream = key.scope.upstream collect { + case n: Call => upstreamEntry(key, n) + case n: Scatter => upstreamEntry(key, n) + case n: Declaration => upstreamEntry(key, n) + } + + val downstream: List[(JobKey, ExecutionStatus)] = key match { + case collector: CollectorKey => findShardEntries(collector) + case _ => Nil + } + + /* + * We need to use an "exists" in this case because the execution store can contain a job attempt with the same + * fqn and index but a preempted status. We wouldn't want that preempted attempt to count against the completion + * of the scatter block. + */ + def isDone(e: JobKey): Boolean = store exists { + case (k, s) => k.scope.fullyQualifiedName == e.scope.fullyQualifiedName && k.index == e.index && s == ExecutionStatus.Done + } + + val dependencies = upstream.flatten ++ downstream + val dependenciesResolved = dependencies forall { case (k, _) => isDone(k) } + + /* + * We need to make sure that all prerequisiteScopes have been resolved to some entry before going forward. + * If a scope cannot be resolved it may be because it is in a scatter that has not been populated yet, + * therefore there is no entry in the executionStore for this scope. + * If that's the case this prerequisiteScope has not been run yet, hence the (upstream forall {_.nonEmpty}) + */ + (upstream forall { _.nonEmpty }) && dependenciesResolved + } + + private def upstreamEntry(entry: JobKey, prerequisiteScope: Scope): Option[ExecutionStoreEntry] = { + prerequisiteScope.closestCommonAncestor(entry.scope) match { + /* + * If this entry refers to a Scope which has a common ancestor with prerequisiteScope + * and that common ancestor is a Scatter block, then find the shard with the same index + * as 'entry'. In other words, if you're in the same scatter block as your pre-requisite + * scope, then depend on the shard (with same index). + * + * NOTE: this algorithm was designed for ONE-LEVEL of scattering and probably does not + * work as-is for nested scatter blocks + */ + case Some(ancestor: Scatter) => + store find { + case (k, _) => k.scope == prerequisiteScope && k.index == entry.index + } + + /* + * Otherwise, simply refer to the collector entry. This means that 'entry' depends + * on every shard of the pre-requisite scope to finish. + */ + case _ => + store find { + case (k, _) => k.scope == prerequisiteScope && k.index.isEmpty + } + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala index 12c719994e5..46743962226 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala @@ -3,88 +3,61 @@ package cromwell.engine.workflow.lifecycle.execution import akka.actor.{Actor, ActorRef, Props} import cromwell.backend._ import cromwell.core.logging.WorkflowLogging -import cromwell.core.{ExecutionStore, JobKey, OutputStore} +import cromwell.core.{CallKey, JobKey, WorkflowId} import cromwell.engine.EngineWorkflowDescriptor -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor._ +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor._ +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.SubWorkflowKey import wdl4s._ import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.util.TryUtil import wdl4s.values.WdlValue import scala.util.{Failure, Success, Try} -final case class JobPreparationActor(executionData: WorkflowExecutionActorData, - jobKey: BackendJobDescriptorKey, - factory: BackendLifecycleActorFactory, - initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef, - backendSingletonActor: Option[ActorRef]) - extends Actor with WdlLookup with WorkflowLogging { - - override lazy val workflowDescriptor: EngineWorkflowDescriptor = executionData.workflowDescriptor - override lazy val workflowId = workflowDescriptor.id - override lazy val executionStore: ExecutionStore = executionData.executionStore - override lazy val outputStore: OutputStore = executionData.outputStore - override lazy val expressionLanguageFunctions = factory.expressionLanguageFunctions( - workflowDescriptor.backendDescriptor, jobKey, initializationData) - +abstract class CallPreparationActor(val workflowDescriptor: EngineWorkflowDescriptor, + val outputStore: OutputStore, + callKey: CallKey) extends Actor with WorkflowLogging { + lazy val workflowIdForLogging = workflowDescriptor.id + def expressionLanguageFunctions: WdlStandardLibraryFunctions + def prepareExecutionActor(inputEvaluation: Map[Declaration, WdlValue]): CallPreparationActorResponse + override def receive = { case Start => - val response = resolveAndEvaluateInputs(jobKey, expressionLanguageFunctions) map { prepareJobExecutionActor } - context.parent ! (response recover { case f => BackendJobPreparationFailed(jobKey, f) }).get + val response = resolveAndEvaluateInputs() map { prepareExecutionActor } + context.parent ! (response recover { case f => CallPreparationFailed(callKey, f) }).get context stop self case unhandled => workflowLogger.warn(self.path.name + " received an unhandled message: " + unhandled) } - // Split inputs map (= evaluated workflow declarations + coerced json inputs) into [init\.*].last - private lazy val splitInputs = workflowDescriptor.backendDescriptor.inputs map { case (fqn, v) => splitFqn(fqn) -> v } - - def resolveAndEvaluateInputs(jobKey: BackendJobDescriptorKey, - wdlFunctions: WdlStandardLibraryFunctions): Try[Map[LocallyQualifiedName, WdlValue]] = { - import RuntimeAttributeDefinition.buildMapBasedLookup + def resolveAndEvaluateInputs(): Try[Map[Declaration, WdlValue]] = { Try { - val call = jobKey.call - lazy val callInputsFromFile = unqualifiedInputsFromInputFile(call) - lazy val workflowScopedLookup = hierarchicalLookup(jobKey.call, jobKey.index) _ - - // Try to resolve, evaluate and coerce declarations in order - val inputEvaluationAttempt = call.task.declarations.foldLeft(Map.empty[LocallyQualifiedName, Try[WdlValue]])((inputs, declaration) => { - val name = declaration.name - - // Try to resolve the declaration, and upon success evaluate the expression - // If the declaration is resolved but can't be evaluated this will throw an evaluation exception - // If it can't be resolved it's ignored and won't appear in the final input map - val evaluated: Option[Try[WdlValue]] = declaration.expression match { - // Static expression in the declaration - case Some(expr) => Option(expr.evaluate(buildMapBasedLookup(inputs), wdlFunctions)) - // Expression found in the input mappings - case None if call.inputMappings.contains(name) => Option(call.inputMappings(name).evaluate(workflowScopedLookup, wdlFunctions)) - // Expression found in the input file - case None if callInputsFromFile.contains(name) => Option(Success(callInputsFromFile(name))) - // Expression can't be found - case _ => None - } - - // Leave out unresolved declarations - evaluated match { - case Some(value) => - val coercedValue = value flatMap declaration.wdlType.coerceRawValue - inputs + ((name, coercedValue)) - case None => inputs - } - }) - - TryUtil.sequenceMap(inputEvaluationAttempt, s"Input evaluation for Call ${call.fullyQualifiedName} failed") - }.flatten + val call = callKey.scope + val scatterMap = callKey.index flatMap { i => + // Will need update for nested scatters + call.upstream collectFirst { case s: Scatter => Map(s -> i) } + } getOrElse Map.empty[Scatter, Int] + + call.evaluateTaskInputs( + workflowDescriptor.backendDescriptor.inputs, + expressionLanguageFunctions, + outputStore.fetchNodeOutputEntries, + scatterMap + ) + } } +} - // Unqualified call inputs for a specific call, from the input json - private def unqualifiedInputsFromInputFile(call: Call): Map[LocallyQualifiedName, WdlValue] = splitInputs collect { - case((root, inputName), v) if root == call.fullyQualifiedName => inputName -> v - } +final case class JobPreparationActor(executionData: WorkflowExecutionActorData, + jobKey: BackendJobDescriptorKey, + factory: BackendLifecycleActorFactory, + initializationData: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + backendSingletonActor: Option[ActorRef]) + extends CallPreparationActor(executionData.workflowDescriptor, executionData.outputStore, jobKey) { - private def prepareJobExecutionActor(inputEvaluation: Map[LocallyQualifiedName, WdlValue]): JobPreparationActorResponse = { + override lazy val expressionLanguageFunctions = factory.expressionLanguageFunctions(workflowDescriptor.backendDescriptor, jobKey, initializationData) + + override def prepareExecutionActor(inputEvaluation: Map[Declaration, WdlValue]): CallPreparationActorResponse = { import RuntimeAttributeDefinition.{addDefaultsToAttributes, evaluateRuntimeAttributes} val curriedAddDefaultsToAttributes = addDefaultsToAttributes(factory.runtimeAttributeDefinitions(initializationData), workflowDescriptor.backendDescriptor.workflowOptions) _ @@ -95,19 +68,45 @@ final case class JobPreparationActor(executionData: WorkflowExecutionActorData, jobDescriptor = BackendJobDescriptor(workflowDescriptor.backendDescriptor, jobKey, attributesWithDefault, inputEvaluation) } yield BackendJobPreparationSucceeded(jobDescriptor, factory.jobExecutionActorProps(jobDescriptor, initializationData, serviceRegistryActor, backendSingletonActor))) match { case Success(s) => s - case Failure(f) => BackendJobPreparationFailed(jobKey, f) + case Failure(f) => CallPreparationFailed(jobKey, f) } } } -object JobPreparationActor { - sealed trait JobPreparationActorCommands - case object Start extends JobPreparationActorCommands +final case class SubWorkflowPreparationActor(executionData: WorkflowExecutionActorData, + key: SubWorkflowKey, + subWorkflowId: WorkflowId) + extends CallPreparationActor(executionData.workflowDescriptor, executionData.outputStore, key) { + + override lazy val expressionLanguageFunctions = executionData.expressionLanguageFunctions + + override def prepareExecutionActor(inputEvaluation: Map[Declaration, WdlValue]): CallPreparationActorResponse = { + val oldBackendDescriptor = workflowDescriptor.backendDescriptor + + val newBackendDescriptor = oldBackendDescriptor.copy( + id = subWorkflowId, + workflow = key.scope.calledWorkflow, + inputs = workflowDescriptor.inputs ++ (inputEvaluation map { case (k, v) => k.fullyQualifiedName -> v }), + breadCrumbs = oldBackendDescriptor.breadCrumbs :+ BackendJobBreadCrumb(workflowDescriptor.workflow, workflowDescriptor.id, key) + ) + val engineDescriptor = workflowDescriptor.copy(backendDescriptor = newBackendDescriptor, parentWorkflow = Option(workflowDescriptor)) + SubWorkflowPreparationSucceeded(engineDescriptor, inputEvaluation) + } +} - sealed trait JobPreparationActorResponse - case class BackendJobPreparationSucceeded(jobDescriptor: BackendJobDescriptor, bjeaProps: Props) extends JobPreparationActorResponse - case class BackendJobPreparationFailed(jobKey: JobKey, throwable: Throwable) extends JobPreparationActorResponse +object CallPreparationActor { + sealed trait CallPreparationActorCommands + case object Start extends CallPreparationActorCommands + + sealed trait CallPreparationActorResponse + + case class BackendJobPreparationSucceeded(jobDescriptor: BackendJobDescriptor, bjeaProps: Props) extends CallPreparationActorResponse + case class SubWorkflowPreparationSucceeded(workflowDescriptor: EngineWorkflowDescriptor, inputs: EvaluatedTaskInputs) extends CallPreparationActorResponse + case class JobCallPreparationFailed(jobKey: JobKey, throwable: Throwable) extends CallPreparationActorResponse + case class CallPreparationFailed(jobKey: JobKey, throwable: Throwable) extends CallPreparationActorResponse +} +object JobPreparationActor { def props(executionData: WorkflowExecutionActorData, jobKey: BackendJobDescriptorKey, factory: BackendLifecycleActorFactory, @@ -119,3 +118,13 @@ object JobPreparationActor { Props(new JobPreparationActor(executionData, jobKey, factory, initializationData, serviceRegistryActor, backendSingletonActor)) } } + +object SubWorkflowPreparationActor { + def props(executionData: WorkflowExecutionActorData, + key: SubWorkflowKey, + subWorkflowId: WorkflowId) = { + // Note that JobPreparationActor doesn't run on the engine dispatcher as it mostly executes backend-side code + // (WDL expression evaluation using Backend's expressionLanguageFunctions) + Props(new SubWorkflowPreparationActor(executionData, key, subWorkflowId)) + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala new file mode 100644 index 00000000000..02c0bc113e0 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala @@ -0,0 +1,98 @@ +package cromwell.engine.workflow.lifecycle.execution + +import cromwell.core.ExecutionIndex._ +import cromwell.core._ +import cromwell.engine.workflow.lifecycle.execution.OutputStore.{OutputCallKey, OutputEntry} +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.CollectorKey +import wdl4s.types.{WdlArrayType, WdlType} +import wdl4s.util.TryUtil +import wdl4s.values.{WdlArray, WdlCallOutputsObject, WdlValue} +import wdl4s.{Call, Declaration, GraphNode, Scope} + +import scala.language.postfixOps +import scala.util.{Failure, Success, Try} + +object OutputStore { + case class OutputEntry(name: String, wdlType: WdlType, wdlValue: Option[WdlValue]) + case class OutputCallKey(call: Scope with GraphNode, index: ExecutionIndex) + def empty = OutputStore(Map.empty) +} + +case class OutputStore(store: Map[OutputCallKey, List[OutputEntry]]) { + def add(values: Map[OutputCallKey, List[OutputEntry]]) = this.copy(store = store ++ values) + + def fetchNodeOutputEntries(node: GraphNode, index: ExecutionIndex): Try[WdlValue] = { + def outputEntriesToMap(outputs: List[OutputEntry]): Map[String, Try[WdlValue]] = { + outputs map { output => + output.wdlValue match { + case Some(wdlValue) => output.name -> Success(wdlValue) + case None => output.name -> Failure(new RuntimeException(s"Could not retrieve output ${output.name} value")) + } + } toMap + } + + def callOutputs(call: Call, outputs: List[OutputEntry]) = { + TryUtil.sequenceMap(outputEntriesToMap(outputs), s"Output fetching for call ${node.unqualifiedName}") map { outputsMap => + WdlCallOutputsObject(call, outputsMap) + } + } + + def declarationOutputs(declaration: Declaration, outputs: List[OutputEntry]) = { + outputs match { + case OutputEntry(name, _, Some(value)) :: Nil => Success(value) + case _ => Failure(new RuntimeException(s"Could not find value for declaration ${declaration.fullyQualifiedName}")) + } + } + + store.get(OutputCallKey(node, index)) match { + case Some(outputs) => + node match { + case call: Call => callOutputs(call, outputs) + case declaration: Declaration => declarationOutputs(declaration, outputs) + case other => Failure(new RuntimeException(s"Only Calls and Declarations are allowed in the OutputStore, found ${other.getClass.getSimpleName}")) + } + case None => Failure(new RuntimeException(s"Could not find scope ${node.unqualifiedName}")) + } + } + + def collectCall(call: Call, sortedShards: Seq[JobKey]) = Try { + val shardsOutputs = sortedShards map { e => + fetchNodeOutputEntries(call, e.index) map { + case callOutputs: WdlCallOutputsObject => callOutputs.outputs + case _ => throw new RuntimeException("Call outputs should be a WdlCallOutputsObject") + } getOrElse(throw new RuntimeException(s"Could not retrieve output for shard ${e.scope} #${e.index}")) + } + + call.outputs map { taskOutput => + val wdlValues = shardsOutputs.map( + _.getOrElse(taskOutput.unqualifiedName, throw new RuntimeException(s"Could not retrieve output ${taskOutput.unqualifiedName}"))) + val arrayOfValues = new WdlArray(WdlArrayType(taskOutput.wdlType), wdlValues) + taskOutput.unqualifiedName -> JobOutput(arrayOfValues) + } toMap + } + + def collectDeclaration(declaration: Declaration, sortedShards: Seq[JobKey]) = Try { + val shardsOutputs = sortedShards map { e => + fetchNodeOutputEntries(declaration, e.index) getOrElse { + throw new RuntimeException(s"Could not retrieve output for shard ${e.scope} #${e.index}") + } + } + + Map(declaration.unqualifiedName -> JobOutput(WdlArray(WdlArrayType(declaration.wdlType), shardsOutputs))) + } + + /** + * Try to generate output for a collector call, by collecting outputs for all of its shards. + * It's fail-fast on shard output retrieval + */ + def generateCollectorOutput(collector: CollectorKey, + shards: Iterable[JobKey]): Try[CallOutputs] = { + lazy val sortedShards = shards.toSeq sortBy { _.index.fromIndex } + + collector.scope match { + case call: Call => collectCall(call, sortedShards) + case declaration: Declaration => collectDeclaration(declaration, sortedShards) + case other => Failure(new RuntimeException(s"Cannot retrieve outputs for ${other.fullyQualifiedName}")) + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala new file mode 100644 index 00000000000..ffac776106f --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala @@ -0,0 +1,275 @@ +package cromwell.engine.workflow.lifecycle.execution + +import akka.actor.SupervisorStrategy.Escalate +import akka.actor.{ActorRef, FSM, LoggingFSM, OneForOneStrategy, Props, SupervisorStrategy} +import cromwell.backend.{AllBackendInitializationData, BackendLifecycleActorFactory, BackendWorkflowDescriptor} +import cromwell.core._ +import cromwell.core.logging.JobLogging +import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.backend.{BackendConfiguration, BackendSingletonCollection} +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor.{CallPreparationFailed, Start, SubWorkflowPreparationSucceeded} +import cromwell.engine.workflow.lifecycle.execution.SubWorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata._ +import cromwell.subworkflowstore.SubWorkflowStoreActor._ +import wdl4s.EvaluatedTaskInputs + +class SubWorkflowExecutionActor(key: SubWorkflowKey, + data: WorkflowExecutionActorData, + factories: Map[String, BackendLifecycleActorFactory], + override val serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean) extends LoggingFSM[SubWorkflowExecutionActorState, SubWorkflowExecutionActorData] with JobLogging with WorkflowMetadataHelper with CallMetadataHelper { + + override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() { case _ => Escalate } + + private val parentWorkflow = data.workflowDescriptor + override val workflowId = parentWorkflow.id + override val workflowIdForCallMetadata = parentWorkflow.id + override def jobTag: String = key.tag + + startWith(SubWorkflowPendingState, SubWorkflowExecutionActorData.empty) + + private var eventList: Seq[ExecutionEvent] = Seq(ExecutionEvent(stateName.toString)) + + when(SubWorkflowPendingState) { + case Event(Execute, _) => + if (restarting) { + subWorkflowStoreActor ! QuerySubWorkflow(parentWorkflow.id, key) + goto(SubWorkflowCheckingStoreState) + } else { + prepareSubWorkflow(createSubWorkflowId()) + } + } + + when(SubWorkflowCheckingStoreState) { + case Event(SubWorkflowFound(entry), _) => + prepareSubWorkflow(WorkflowId.fromString(entry.subWorkflowExecutionUuid)) + case Event(_: SubWorkflowNotFound, _) => + prepareSubWorkflow(createSubWorkflowId()) + case Event(SubWorkflowStoreFailure(command, reason), _) => + jobLogger.error(reason, s"SubWorkflowStore failure for command $command, starting sub workflow with fresh ID.") + prepareSubWorkflow(createSubWorkflowId()) + } + + when(SubWorkflowPreparingState) { + case Event(SubWorkflowPreparationSucceeded(subWorkflowEngineDescriptor, inputs), _) => + startSubWorkflow(subWorkflowEngineDescriptor, inputs) + case Event(failure: CallPreparationFailed, _) => + context.parent ! SubWorkflowFailedResponse(key, Map.empty, failure.throwable) + context stop self + stay() + } + + when(SubWorkflowRunningState) { + case Event(WorkflowExecutionSucceededResponse(executedJobKeys, outputs), _) => + context.parent ! SubWorkflowSucceededResponse(key, executedJobKeys, outputs) + goto(SubWorkflowSucceededState) + case Event(WorkflowExecutionFailedResponse(executedJobKeys, reason), _) => + context.parent ! SubWorkflowFailedResponse(key, executedJobKeys, reason) + goto(SubWorkflowFailedState) + case Event(WorkflowExecutionAbortedResponse(executedJobKeys), _) => + context.parent ! SubWorkflowAbortedResponse(key, executedJobKeys) + goto(SubWorkflowAbortedState) + } + + when(SubWorkflowSucceededState) { FSM.NullFunction } + when(SubWorkflowFailedState) { FSM.NullFunction } + when(SubWorkflowAbortedState) { FSM.NullFunction } + + whenUnhandled { + case Event(SubWorkflowStoreRegisterSuccess(command), _) => + // Nothing to do here + stay() + case Event(SubWorkflowStoreCompleteSuccess(command), _) => + // Nothing to do here + stay() + case Event(SubWorkflowStoreFailure(command, reason), _) => + jobLogger.error(reason, s"SubWorkflowStore failure for command $command") + stay() + case Event(MetadataPutFailed(action, error), _) => + jobLogger.warn(s"Put failed for Metadata action $action", error) + stay() + case Event(MetadataPutAcknowledgement(_), _) => stay() + } + + onTransition { + case (fromState, toState) => + stateData.subWorkflowId foreach { id => pushCurrentStateToMetadataService(id, toState.workflowState) } + } + + onTransition { + case (fromState, subWorkflowTerminalState: SubWorkflowTerminalState) => + stateData.subWorkflowId match { + case Some(id) => + pushWorkflowEnd(id) + pushExecutionEventsToMetadataService(key, eventList) + case None => jobLogger.error("Sub workflow completed without a Sub Workflow UUID.") + } + context stop self + } + + onTransition { + case fromState -> toState => eventList :+= ExecutionEvent(toState.toString) + } + + private def startSubWorkflow(subWorkflowEngineDescriptor: EngineWorkflowDescriptor, inputs: EvaluatedTaskInputs) = { + val subWorkflowActor = createSubWorkflowActor(subWorkflowEngineDescriptor) + + subWorkflowActor ! WorkflowExecutionActor.ExecuteWorkflowCommand + context.parent ! JobRunning(key, inputs, Option(subWorkflowActor)) + pushWorkflowRunningMetadata(subWorkflowEngineDescriptor.backendDescriptor, inputs) + + goto(SubWorkflowRunningState) + } + + private def prepareSubWorkflow(subWorkflowId: WorkflowId) = { + createSubWorkflowPreparationActor(subWorkflowId) ! Start + context.parent ! JobStarting(key) + pushCurrentStateToMetadataService(subWorkflowId, WorkflowRunning) + pushWorkflowStart(subWorkflowId) + goto(SubWorkflowPreparingState) using SubWorkflowExecutionActorData(Option(subWorkflowId)) + } + + def createSubWorkflowPreparationActor(subWorkflowId: WorkflowId) = { + context.actorOf( + SubWorkflowPreparationActor.props(data, key, subWorkflowId), + s"$subWorkflowId-SubWorkflowPreparationActor-${key.tag}" + ) + } + + def createSubWorkflowActor(subWorkflowEngineDescriptor: EngineWorkflowDescriptor) = { + context.actorOf( + WorkflowExecutionActor.props( + subWorkflowEngineDescriptor, + serviceRegistryActor, + jobStoreActor, + subWorkflowStoreActor, + callCacheReadActor, + jobTokenDispenserActor, + backendSingletonCollection, + initializationData, + restarting + ), + s"${subWorkflowEngineDescriptor.id}-SubWorkflowActor-${key.tag}" + ) + } + + private def pushWorkflowRunningMetadata(subWorkflowDescriptor: BackendWorkflowDescriptor, workflowInputs: EvaluatedTaskInputs) = { + val subWorkflowId = subWorkflowDescriptor.id + val parentWorkflowMetadataKey = MetadataKey(parentWorkflow.id, Option(MetadataJobKey(key.scope.fullyQualifiedName, key.index, key.attempt)), CallMetadataKeys.SubWorkflowId) + + val events = List( + MetadataEvent(parentWorkflowMetadataKey, MetadataValue(subWorkflowId)), + MetadataEvent(MetadataKey(subWorkflowId, None, WorkflowMetadataKeys.Name), MetadataValue(key.scope.callable.unqualifiedName)), + MetadataEvent(MetadataKey(subWorkflowId, None, WorkflowMetadataKeys.ParentWorkflowId), MetadataValue(parentWorkflow.id)) + ) + + val inputEvents = workflowInputs match { + case empty if empty.isEmpty => + List(MetadataEvent.empty(MetadataKey(subWorkflowId, None,WorkflowMetadataKeys.Inputs))) + case inputs => + inputs flatMap { case (inputName, wdlValue) => + wdlValueToMetadataEvents(MetadataKey(subWorkflowId, None, s"${WorkflowMetadataKeys.Inputs}:${inputName.unqualifiedName}"), wdlValue) + } + } + + val workflowRootEvents = buildWorkflowRootMetadataEvents(subWorkflowDescriptor) + + serviceRegistryActor ! PutMetadataAction(events ++ inputEvents ++ workflowRootEvents) + } + + private def buildWorkflowRootMetadataEvents(subWorkflowDescriptor: BackendWorkflowDescriptor) = { + val subWorkflowId = subWorkflowDescriptor.id + + factories flatMap { + case (backendName, factory) => + BackendConfiguration.backendConfigurationDescriptor(backendName).toOption map { config => + backendName -> factory.getWorkflowExecutionRootPath(subWorkflowDescriptor, config.backendConfig, initializationData.get(backendName)) + } + } map { + case (backend, wfRoot) => + MetadataEvent(MetadataKey(subWorkflowId, None, s"${WorkflowMetadataKeys.WorkflowRoot}[$backend]"), MetadataValue(wfRoot.toAbsolutePath)) + } + } + + private def createSubWorkflowId() = { + val subWorkflowId = WorkflowId.randomId() + // Register ID to the sub workflow store + subWorkflowStoreActor ! RegisterSubWorkflow(parentWorkflow.rootWorkflow.id, parentWorkflow.id, key, subWorkflowId) + subWorkflowId + } +} + +object SubWorkflowExecutionActor { + sealed trait SubWorkflowExecutionActorState { + def workflowState: WorkflowState + } + sealed trait SubWorkflowTerminalState extends SubWorkflowExecutionActorState + + case object SubWorkflowPendingState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowCheckingStoreState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowPreparingState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowRunningState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowAbortingState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowAborting + } + + case object SubWorkflowSucceededState extends SubWorkflowTerminalState { + override val workflowState = WorkflowSucceeded + } + case object SubWorkflowAbortedState extends SubWorkflowTerminalState { + override val workflowState = WorkflowAborted + } + case object SubWorkflowFailedState extends SubWorkflowTerminalState { + override val workflowState = WorkflowFailed + } + + object SubWorkflowExecutionActorData { + def empty = SubWorkflowExecutionActorData(None) + } + case class SubWorkflowExecutionActorData(subWorkflowId: Option[WorkflowId]) + + sealed trait EngineWorkflowExecutionActorCommand + case object Execute + + def props(key: SubWorkflowKey, + data: WorkflowExecutionActorData, + factories: Map[String, BackendLifecycleActorFactory], + serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean) = { + Props(new SubWorkflowExecutionActor( + key, + data, + factories, + serviceRegistryActor, + jobStoreActor, + subWorkflowStoreActor, + callCacheReadActor, + jobTokenDispenserActor, + backendSingletonCollection, + initializationData, + restarting) + ) + } +} \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala deleted file mode 100644 index 8b2af57eac1..00000000000 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala +++ /dev/null @@ -1,106 +0,0 @@ -package cromwell.engine.workflow.lifecycle.execution - -import cromwell.core.{ExecutionIndex, ExecutionStore, OutputStore} -import cromwell.engine.EngineWorkflowDescriptor -import ExecutionIndex._ -import wdl4s._ -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.values.{WdlArray, WdlCallOutputsObject, WdlValue} - -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} - -trait WdlLookup { - - def workflowDescriptor: EngineWorkflowDescriptor - def executionStore: ExecutionStore - def outputStore: OutputStore - def expressionLanguageFunctions: WdlStandardLibraryFunctions - - private lazy val splitInputs = workflowDescriptor.backendDescriptor.inputs map { - case (fqn, v) => splitFqn(fqn) -> v - } - - // Unqualified workflow level inputs - private lazy val unqualifiedWorkflowInputs: Map[LocallyQualifiedName, WdlValue] = splitInputs collect { - case((root, inputName), v) if root == workflowDescriptor.namespace.workflow.unqualifiedName => inputName -> v - } - - /** - * Lookup an identifier by - * first looking at the completed calls map - * and if not found traversing up the scope hierarchy from the scope from which the lookup originated. - */ - def hierarchicalLookup(scope: Scope, index: ExecutionIndex)(identifier: String): WdlValue = { - // First lookup calls - lookupCall(scope, index, identifier) recoverWith { - // Lookup in the same scope (currently no scope support this but say we have scatter declarations, or multiple scatter variables, or nested workflows..) - case _: VariableNotFoundException | _: WdlExpressionException => scopedLookup(scope, index, identifier) - } recover { - // Lookup parent if present - case _: VariableNotFoundException | _: WdlExpressionException => scope.parent match { - case Some(parent) => hierarchicalLookup(parent, index)(identifier) - case None => throw new VariableNotFoundException(s"Can't find $identifier") - } - } get - } - - private def scopedLookup(scope: Scope, index: ExecutionIndex, identifier: String): Try[WdlValue] = { - def scopedLookupFunction = scope match { - case scatter: Scatter if index.isDefined => lookupScatter(scatter, index.get) _ - case workflow: Workflow => lookupWorkflowDeclaration _ - case _ => (_: String) => Failure(new VariableNotFoundException(s"Can't find $identifier in scope $scope")) - } - - scopedLookupFunction(identifier) - } - - // In this case, the scopedLookup function is effectively equivalent to looking into unqualifiedWorkflowInputs for the value - // because the resolution / evaluation / coercion has already happened in the MaterializeWorkflowDescriptorActor - private def lookupWorkflowDeclaration(identifier: String) = { - unqualifiedWorkflowInputs.get(identifier) match { - case Some(value) => Success(value) - case None => Failure(new WdlExpressionException(s"Could not resolve variable $identifier as a workflow input")) - } - } - - private def lookupScatter(scatter: Scatter, index: Int)(identifier: String): Try[WdlValue] = { - if (identifier == scatter.item) { - // Scatters are not indexed yet (they can't be nested) - val scatterLookup = hierarchicalLookup(scatter, None) _ - scatter.collection.evaluate(scatterLookup, expressionLanguageFunctions) map { - case collection: WdlArray if collection.value.isDefinedAt(index) => collection.value(index) - case collection: WdlArray => throw new RuntimeException(s"Index $index out of bound in $collection for scatter ${scatter.fullyQualifiedName}") - case other => throw new RuntimeException(s"Scatter ${scatter.fullyQualifiedName} collection is not an array: $other") - } recover { - case e => throw new RuntimeException(s"Failed to evaluate collection for scatter ${scatter.fullyQualifiedName}", e) - } - } else { - Failure(new VariableNotFoundException(identifier)) - } - } - - private def lookupCall(scope: Scope, scopeIndex: ExecutionIndex, identifier: String): Try[WdlCallOutputsObject] = { - val calls = executionStore.store.keys.view map { _.scope } collect { case c: Call => c } - - calls find { _.unqualifiedName == identifier } match { - case Some(matchedCall) => - /** - * After matching the Call, this determines if the `key` depends on a single shard - * of a scatter'd job or if it depends on the whole thing. Right now, the heuristic - * is "If we're both in a scatter block together, then I depend on a shard. If not, - * I depend on the collected value" - * - * TODO: nested-scatter - this will likely not be sufficient for nested scatters - */ - val index: ExecutionIndex = matchedCall.closestCommonAncestor(scope) flatMap { - case s: Scatter => scopeIndex - case _ => None - } - - outputStore.fetchCallOutputEntries(matchedCall, index) - case None => Failure(new WdlExpressionException(s"Could not find a call with identifier '$identifier'")) - } - } - -} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala index b7358296ba1..10f0bf37738 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala @@ -1,316 +1,76 @@ package cromwell.engine.workflow.lifecycle.execution -import java.time.OffsetDateTime - -import akka.actor.SupervisorStrategy.{Escalate, Stop} import akka.actor._ import cats.data.NonEmptyList import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, FailedNonRetryableResponse, FailedRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand -import cromwell.backend.{AllBackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey} -import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.backend.{AllBackendInitializationData, BackendJobDescriptorKey, JobExecutionMap} +import cromwell.core.Dispatcher._ import cromwell.core.ExecutionIndex._ import cromwell.core.ExecutionStatus._ -import cromwell.core.ExecutionStore.ExecutionStoreEntry -import cromwell.core.OutputStore.OutputEntry import cromwell.core.WorkflowOptions.WorkflowFailureMode import cromwell.core._ import cromwell.core.logging.WorkflowLogging import cromwell.engine.backend.{BackendSingletonCollection, CromwellBackends} -import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.JobRunning -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.BackendJobPreparationFailed -import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.WorkflowExecutionActorState +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{apply => _, _} import cromwell.engine.workflow.lifecycle.{EngineLifecycleActorAbortCommand, EngineLifecycleActorAbortedResponse} import cromwell.engine.{ContinueWhilePossible, EngineWorkflowDescriptor} -import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata._ +import cromwell.services.metadata.MetadataService.{MetadataPutAcknowledgement, MetadataPutFailed} +import cromwell.util.{StopAndLogSupervisor, TryUtil} import cromwell.webservice.EngineStatsActor import lenthall.exception.ThrowableAggregation import net.ceedubs.ficus.Ficus._ -import wdl4s.types.WdlArrayType -import wdl4s.util.TryUtil import wdl4s.values.{WdlArray, WdlValue} import wdl4s.{Scope, _} import scala.annotation.tailrec import scala.language.postfixOps -import scala.util.{Failure, Random, Success, Try} - -object WorkflowExecutionActor { - - /** - * States - */ - sealed trait WorkflowExecutionActorState { def terminal = false } - sealed trait WorkflowExecutionActorTerminalState extends WorkflowExecutionActorState { override val terminal = true } - - case object WorkflowExecutionPendingState extends WorkflowExecutionActorState - case object WorkflowExecutionInProgressState extends WorkflowExecutionActorState - case object WorkflowExecutionAbortingState extends WorkflowExecutionActorState - case object WorkflowExecutionSuccessfulState extends WorkflowExecutionActorTerminalState - case object WorkflowExecutionFailedState extends WorkflowExecutionActorTerminalState - case object WorkflowExecutionAbortedState extends WorkflowExecutionActorTerminalState - - /** - * Commands - */ - sealed trait WorkflowExecutionActorCommand - case object ExecuteWorkflowCommand extends WorkflowExecutionActorCommand - case object RestartExecutingWorkflowCommand extends WorkflowExecutionActorCommand - - /** - * Responses - */ - sealed trait WorkflowExecutionActorResponse { - def executionStore: ExecutionStore - - def outputStore: OutputStore - } - - case class WorkflowExecutionSucceededResponse(executionStore: ExecutionStore, outputStore: OutputStore) - extends WorkflowExecutionActorResponse { - override def toString = "WorkflowExecutionSucceededResponse" - } - - case class WorkflowExecutionAbortedResponse(executionStore: ExecutionStore, outputStore: OutputStore) - extends WorkflowExecutionActorResponse with EngineLifecycleActorAbortedResponse { - override def toString = "WorkflowExecutionAbortedResponse" - } - - final case class WorkflowExecutionFailedResponse(executionStore: ExecutionStore, outputStore: OutputStore, - reasons: Seq[Throwable]) extends WorkflowExecutionActorResponse { - override def toString = "WorkflowExecutionFailedResponse" - } - - /** - * Internal control flow messages - */ - private case class JobInitializationFailed(jobKey: JobKey, throwable: Throwable) - private case class ScatterCollectionFailedResponse(collectorKey: CollectorKey, throwable: Throwable) - private case class ScatterCollectionSucceededResponse(collectorKey: CollectorKey, outputs: JobOutputs) - - /** - * Internal ADTs - */ - case class ScatterKey(scope: Scatter) extends JobKey { - override val index = None // When scatters are nested, this might become Some(_) - override val attempt = 1 - override val tag = scope.unqualifiedName - - /** - * Creates a sub-ExecutionStore with Starting entries for each of the scoped children. - * - * @param count Number of ways to scatter the children. - * @return ExecutionStore of scattered children. - */ - def populate(count: Int): Map[JobKey, ExecutionStatus.Value] = { - val keys = this.scope.children flatMap { explode(_, count) } - keys map { _ -> ExecutionStatus.NotStarted } toMap - } - - private def explode(scope: Scope, count: Int): Seq[JobKey] = { - scope match { - case call: Call => - val shards = (0 until count) map { i => BackendJobDescriptorKey(call, Option(i), 1) } - shards :+ CollectorKey(call) - case scatter: Scatter => - throw new UnsupportedOperationException("Nested Scatters are not supported (yet).") - case e => - throw new UnsupportedOperationException(s"Scope ${e.getClass.getName} is not supported.") - } - } - } - - // Represents a scatter collection for a call in the execution store - case class CollectorKey(scope: Call) extends JobKey { - override val index = None - override val attempt = 1 - override val tag = s"Collector-${scope.unqualifiedName}" - } - - case class WorkflowExecutionException[T <: Throwable](exceptions: NonEmptyList[T]) extends ThrowableAggregation { - override val throwables = exceptions.toList - override val exceptionContext = s"WorkflowExecutionActor" - } - - def props(workflowId: WorkflowId, - workflowDescriptor: EngineWorkflowDescriptor, - serviceRegistryActor: ActorRef, - jobStoreActor: ActorRef, - callCacheReadActor: ActorRef, - jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection, - initializationData: AllBackendInitializationData, - restarting: Boolean): Props = { - Props(WorkflowExecutionActor(workflowId, workflowDescriptor, serviceRegistryActor, jobStoreActor, - callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, initializationData, restarting)).withDispatcher(EngineDispatcher) - } - - implicit class EnhancedExecutionStore(val executionStore: ExecutionStore) extends AnyVal { - // Convert the store to a `List` before `collect`ing to sidestep expensive and pointless hashing of `Scope`s when - // assembling the result. - def runnableScopes = executionStore.store.toList collect { case entry if isRunnable(entry) => entry._1 } - - private def isRunnable(entry: ExecutionStoreEntry) = { - entry match { - case (key, ExecutionStatus.NotStarted) => arePrerequisitesDone(key) - case _ => false - } - } - - def findShardEntries(key: CollectorKey): List[ExecutionStoreEntry] = executionStore.store.toList collect { - case (k: BackendJobDescriptorKey, v) if k.scope == key.scope && k.isShard => (k, v) - } - - private def arePrerequisitesDone(key: JobKey): Boolean = { - val upstream = key.scope.prerequisiteScopes.toList.map(s => upstreamEntries(key, s)) - val downstream = key match { - case collector: CollectorKey => findShardEntries(collector) - case _ => Nil - } - - val dependencies = upstream.flatten ++ downstream - val dependenciesResolved = dependencies forall { case (_, s) => s == ExecutionStatus.Done } - - /** - * We need to make sure that all prerequisiteScopes have been resolved to some entry before going forward. - * If a scope cannot be resolved it may be because it is in a scatter that has not been populated yet, - * therefore there is no entry in the executionStore for this scope. - * If that's the case this prerequisiteScope has not been run yet, hence the (upstream forall {_.nonEmpty}) - */ - (upstream forall { _.nonEmpty }) && dependenciesResolved - } - - private def upstreamEntries(entry: JobKey, prerequisiteScope: Scope): Seq[ExecutionStoreEntry] = { - prerequisiteScope.closestCommonAncestor(entry.scope) match { - /** - * If this entry refers to a Scope which has a common ancestor with prerequisiteScope - * and that common ancestor is a Scatter block, then find the shard with the same index - * as 'entry'. In other words, if you're in the same scatter block as your pre-requisite - * scope, then depend on the shard (with same index). - * - * NOTE: this algorithm was designed for ONE-LEVEL of scattering and probably does not - * work as-is for nested scatter blocks - */ - case Some(ancestor: Scatter) => - executionStore.store filter { - case (k, _) => k.scope == prerequisiteScope && k.index == entry.index - } toSeq - - /** - * Otherwise, simply refer to the entry the collector entry. This means that 'entry' depends - * on every shard of the pre-requisite scope to finish. - */ - case _ => - executionStore.store filter { - case (k, _) => k.scope == prerequisiteScope && k.index.isEmpty - } toSeq - } - } - } - - implicit class EnhancedOutputStore(val outputStore: OutputStore) extends AnyVal { - /** - * Try to generate output for a collector call, by collecting outputs for all of its shards. - * It's fail-fast on shard output retrieval - */ - def generateCollectorOutput(collector: CollectorKey, - shards: Iterable[BackendJobDescriptorKey]): Try[JobOutputs] = Try { - val shardsOutputs = shards.toSeq sortBy { _.index.fromIndex } map { e => - outputStore.fetchCallOutputEntries(e.scope, e.index) map { - _.outputs - } getOrElse(throw new RuntimeException(s"Could not retrieve output for shard ${e.scope} #${e.index}")) - } - collector.scope.task.outputs map { taskOutput => - val wdlValues = shardsOutputs.map( - _.getOrElse(taskOutput.name, throw new RuntimeException(s"Could not retrieve output ${taskOutput.name}"))) - val arrayOfValues = new WdlArray(WdlArrayType(taskOutput.wdlType), wdlValues) - taskOutput.name -> JobOutput(arrayOfValues) - } toMap - } - } -} - -final case class WorkflowExecutionActor(workflowId: WorkflowId, - workflowDescriptor: EngineWorkflowDescriptor, - serviceRegistryActor: ActorRef, - jobStoreActor: ActorRef, - callCacheReadActor: ActorRef, - jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection, - initializationData: AllBackendInitializationData, - restarting: Boolean) - extends LoggingFSM[WorkflowExecutionActorState, WorkflowExecutionActorData] with WorkflowLogging { - - import WorkflowExecutionActor._ - - override def supervisorStrategy = AllForOneStrategy() { - case ex: ActorInitializationException => - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(ex)) - context.stop(self) - Stop - case t => super.supervisorStrategy.decider.applyOrElse(t, (_: Any) => Escalate) - } - - val tag = s"WorkflowExecutionActor [UUID(${workflowId.shortString})]" - private lazy val DefaultMaxRetriesFallbackValue = 10 - +import scala.util.{Failure, Success, Try} + +case class WorkflowExecutionActor(workflowDescriptor: EngineWorkflowDescriptor, + serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean) + extends LoggingFSM[WorkflowExecutionActorState, WorkflowExecutionActorData] with WorkflowLogging with CallMetadataHelper with StopAndLogSupervisor { + implicit val ec = context.dispatcher + + override val workflowIdForLogging = workflowDescriptor.id + override val workflowIdForCallMetadata = workflowDescriptor.id - val MaxRetries = ConfigFactory.load().as[Option[Int]]("system.max-retries") match { + private val tag = s"WorkflowExecutionActor [UUID(${workflowDescriptor.id.shortString})]" + private val MaxRetries = ConfigFactory.load().as[Option[Int]]("system.max-retries") match { case Some(value) => value case None => workflowLogger.warn(s"Failed to load the max-retries value from the configuration. Defaulting back to a value of '$DefaultMaxRetriesFallbackValue'.") DefaultMaxRetriesFallbackValue } - - private val factories = TryUtil.sequenceMap(workflowDescriptor.backendAssignments.values.toSet[String] map { backendName => + + private val backendFactories = TryUtil.sequenceMap(workflowDescriptor.backendAssignments.values.toSet[String] map { backendName => backendName -> CromwellBackends.backendLifecycleFactoryActorByName(backendName) } toMap) recover { case e => throw new RuntimeException("Could not instantiate backend factories", e) } get - // Initialize the StateData with ExecutionStore (all calls as NotStarted) and SymbolStore startWith( WorkflowExecutionPendingState, WorkflowExecutionActorData( workflowDescriptor, - executionStore = buildInitialExecutionStore(), + executionStore = ExecutionStore(workflowDescriptor.backendDescriptor.workflow, workflowDescriptor.inputs), backendJobExecutionActors = Map.empty, + engineCallExecutionActors = Map.empty, + subWorkflowExecutionActors = Map.empty, + downstreamExecutionMap = Map.empty, outputStore = OutputStore.empty ) ) - private def buildInitialExecutionStore(): ExecutionStore = { - val workflow = workflowDescriptor.backendDescriptor.workflowNamespace.workflow - // Only add direct children to the store, the rest is dynamically created when necessary - val keys = workflow.children map { - case call: Call => BackendJobDescriptorKey(call, None, 1) - case scatter: Scatter => ScatterKey(scatter) - } - - ExecutionStore(keys.map(_ -> NotStarted).toMap) - } - - private def handleNonRetryableFailure(stateData: WorkflowExecutionActorData, failedJobKey: JobKey, reason: Throwable) = { - val mergedStateData = stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(failedJobKey -> ExecutionStatus.Failed))) - .removeBackendJobExecutionActor(failedJobKey) - - if (workflowDescriptor.getWorkflowOption(WorkflowFailureMode).contains(ContinueWhilePossible.toString)) { - mergedStateData.workflowCompletionStatus match { - case Some(completionStatus) if completionStatus == Failed => - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(reason)) - goto(WorkflowExecutionFailedState) using mergedStateData - case _ => - stay() using startRunnableScopes(mergedStateData) - } - } else { - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(reason)) - goto(WorkflowExecutionFailedState) using mergedStateData - } - } - when(WorkflowExecutionPendingState) { case Event(ExecuteWorkflowCommand, stateData) => val data = startRunnableScopes(stateData) @@ -318,32 +78,64 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, } when(WorkflowExecutionInProgressState) { - case Event(JobRunning(jobDescriptor, backendJobExecutionActor), stateData) => - pushRunningJobMetadata(jobDescriptor) + case Event(JobStarting(jobKey), stateData) => + pushStartingCallMetadata(jobKey) stay() using stateData - .addBackendJobExecutionActor(jobDescriptor.key, backendJobExecutionActor) - .mergeExecutionDiff(WorkflowExecutionDiff(Map(jobDescriptor.key -> ExecutionStatus.Running))) - case Event(BackendJobPreparationFailed(jobKey, throwable), stateData) => - pushFailedJobMetadata(jobKey, None, throwable, retryableFailure = false) - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(throwable)) - goto(WorkflowExecutionFailedState) using stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))) - case Event(SucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => - pushSuccessfulJobMetadata(jobKey, returnCode, callOutputs) - handleJobSuccessful(jobKey, callOutputs, stateData) - case Event(FailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => - pushFailedJobMetadata(jobKey, returnCode, reason, retryableFailure = false) - handleNonRetryableFailure(stateData, jobKey, reason) - case Event(FailedRetryableResponse(jobKey, reason, returnCode), stateData) => - workflowLogger.warn(s"Job ${jobKey.tag} failed with a retryable failure: ${reason.getMessage}") - pushFailedJobMetadata(jobKey, None, reason, retryableFailure = true) - handleRetryableFailure(jobKey, reason, returnCode) - case Event(JobInitializationFailed(jobKey, reason), stateData) => - pushFailedJobMetadata(jobKey, None, reason, retryableFailure = false) - handleNonRetryableFailure(stateData, jobKey, reason) + .mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Starting))) + case Event(JobRunning(key, inputs, callExecutionActor), stateData) => + pushRunningCallMetadata(key, inputs) + stay() using stateData + .addCallExecutionActor(key, callExecutionActor) + .mergeExecutionDiff(WorkflowExecutionDiff(Map(key -> ExecutionStatus.Running))) + + //Success + // Job + case Event(JobSucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => + pushSuccessfulCallMetadata(jobKey, returnCode, callOutputs) + handleCallSuccessful(jobKey, callOutputs, stateData, Map.empty) + // Sub Workflow + case Event(SubWorkflowSucceededResponse(jobKey, descendantJobKeys, callOutputs), stateData) => + pushSuccessfulCallMetadata(jobKey, None, callOutputs) + handleCallSuccessful(jobKey, callOutputs, stateData, descendantJobKeys) + // Scatter case Event(ScatterCollectionSucceededResponse(jobKey, callOutputs), stateData) => - handleJobSuccessful(jobKey, callOutputs, stateData) + handleCallSuccessful(jobKey, callOutputs, stateData, Map.empty) + // Declaration + case Event(DeclarationEvaluationSucceededResponse(jobKey, callOutputs), stateData) => + handleDeclarationEvaluationSuccessful(jobKey, callOutputs, stateData) + + // Failure + // Initialization + case Event(JobInitializationFailed(jobKey, reason), stateData) => + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, reason, Map.empty) + // Job Non Retryable + case Event(JobFailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => + pushFailedCallMetadata(jobKey, returnCode, reason, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, reason, Map.empty) + // Job Retryable + case Event(JobFailedRetryableResponse(jobKey, reason, returnCode), stateData) => + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = true) + handleRetryableFailure(jobKey, reason, returnCode) + // Sub Workflow - sub workflow failures are always non retryable + case Event(SubWorkflowFailedResponse(jobKey, descendantJobKeys, reason), stateData) => + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, reason, descendantJobKeys) + case Event(DeclarationEvaluationFailedResponse(jobKey, reason), stateData) => + handleDeclarationEvaluationFailure(jobKey, reason, stateData) } + when(WorkflowExecutionAbortingState) { + case Event(AbortedResponse(jobKey), stateData) => + handleCallAborted(stateData, jobKey, Map.empty) + case Event(SubWorkflowAbortedResponse(jobKey, executedKeys), stateData) => + handleCallAborted(stateData, jobKey, executedKeys) + case Event(SubWorkflowSucceededResponse(subKey, executedKeys, _), stateData) => + handleCallAborted(stateData, subKey, executedKeys) + case Event(JobSucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => + handleCallAborted(stateData, jobKey, Map.empty) + } + when(WorkflowExecutionSuccessfulState) { FSM.NullFunction } @@ -359,51 +151,63 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, */ private def alreadyFailedMopUp: StateFunction = { case Event(JobInitializationFailed(jobKey, reason), stateData) => - pushFailedJobMetadata(jobKey, None, reason, retryableFailure = false) + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = false) stay - case Event(FailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => - pushFailedJobMetadata(jobKey, returnCode, reason, retryableFailure = false) + case Event(JobFailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => + pushFailedCallMetadata(jobKey, returnCode, reason, retryableFailure = false) stay - case Event(FailedRetryableResponse(jobKey, reason, returnCode), stateData) => - pushFailedJobMetadata(jobKey, returnCode, reason, retryableFailure = true) + case Event(JobFailedRetryableResponse(jobKey, reason, returnCode), stateData) => + pushFailedCallMetadata(jobKey, returnCode, reason, retryableFailure = true) stay - case Event(SucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => - pushSuccessfulJobMetadata(jobKey, returnCode, callOutputs) + case Event(JobSucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => + pushSuccessfulCallMetadata(jobKey, returnCode, callOutputs) stay } - when(WorkflowExecutionAbortingState) { - case Event(AbortedResponse(jobKey), stateData) => - workflowLogger.info(s"$tag job aborted: ${jobKey.tag}") - val newStateData = stateData.removeBackendJobExecutionActor(jobKey) - if (newStateData.backendJobExecutionActors.isEmpty) { - workflowLogger.info(s"$tag all jobs aborted") - goto(WorkflowExecutionAbortedState) - } else { - stay() using newStateData + + def handleTerminated(actorRef: ActorRef) = { + // Both of these Should Never Happen (tm), assuming the state data is set correctly on EJEA creation. + // If they do, it's a big programmer error and the workflow execution fails. + val jobKey = stateData.engineCallExecutionActors.getOrElse(actorRef, throw new RuntimeException("Programmer Error: An EJEA has terminated but was not assigned a jobKey")) + val jobStatus = stateData.executionStore.store.getOrElse(jobKey, throw new RuntimeException("Programmer Error: An EJEA representing a jobKey which this workflow is not running has sent up a terminated message.")) + + if (!jobStatus.isTerminal) { + val terminationException = getFailureCause(actorRef) match { + case Some(e) => new RuntimeException("Unexpected failure in EJEA.", e) + case None => new RuntimeException("Unexpected failure in EJEA (root cause not captured).") } + self ! JobFailedNonRetryableResponse(jobKey, terminationException, None) + } + + stay } whenUnhandled { + case Event(Terminated(actorRef), stateData) => handleTerminated(actorRef) using stateData.removeEngineJobExecutionActor(actorRef) case Event(MetadataPutFailed(action, error), _) => // Do something useful here?? - workflowLogger.warn(s"$tag Put failed for Metadata action $action : ${error.getMessage}") - stay + workflowLogger.warn(s"$tag Put failed for Metadata action $action", error) + stay() case Event(MetadataPutAcknowledgement(_), _) => stay() case Event(EngineLifecycleActorAbortCommand, stateData) => - if (stateData.backendJobExecutionActors.nonEmpty) { - log.info(s"$tag: Abort received. Aborting ${stateData.backendJobExecutionActors.size} EJEAs") - stateData.backendJobExecutionActors.values foreach {_ ! AbortJobCommand} + if (stateData.hasRunningActors) { + log.info(s"$tag: Abort received. " + + s"Aborting ${stateData.backendJobExecutionActors.size} Job Execution Actors" + + s"and ${stateData.subWorkflowExecutionActors.size} Sub Workflow Execution Actors" + ) + stateData.backendJobExecutionActors.values foreach { _ ! AbortJobCommand } + stateData.subWorkflowExecutionActors.values foreach { _ ! EngineLifecycleActorAbortCommand } goto(WorkflowExecutionAbortingState) } else { goto(WorkflowExecutionAbortedState) } case Event(EngineStatsActor.JobCountQuery, data) => sender ! EngineStatsActor.JobCount(data.backendJobExecutionActors.size) + data.subWorkflowExecutionActors.values foreach { _ forward EngineStatsActor.JobCountQuery } stay() case unhandledMessage => workflowLogger.warn(s"$tag received an unhandled message: ${unhandledMessage.event} in state: $stateName") - stay + stay() } onTransition { @@ -415,11 +219,61 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, } onTransition { - case _ -> WorkflowExecutionSuccessfulState => - pushWorkflowOutputMetadata(nextStateData) - context.parent ! WorkflowExecutionSucceededResponse(nextStateData.executionStore, nextStateData.outputStore) case _ -> WorkflowExecutionAbortedState => - context.parent ! WorkflowExecutionAbortedResponse(nextStateData.executionStore, nextStateData.outputStore) + context.parent ! WorkflowExecutionAbortedResponse(nextStateData.jobExecutionMap) + } + + private def handleNonRetryableFailure(stateData: WorkflowExecutionActorData, failedJobKey: JobKey, reason: Throwable, jobExecutionMap: JobExecutionMap) = { + val newData = stateData + .removeCallExecutionActor(failedJobKey) + .addExecutions(jobExecutionMap) + + handleExecutionFailure(failedJobKey, newData, reason, jobExecutionMap) + } + + private def handleDeclarationEvaluationFailure(declarationKey: DeclarationKey, reason: Throwable, stateData: WorkflowExecutionActorData) = { + handleExecutionFailure(declarationKey, stateData, reason, Map.empty) + } + + private def handleExecutionFailure(failedJobKey: JobKey, data: WorkflowExecutionActorData, reason: Throwable, jobExecutionMap: JobExecutionMap) = { + val newData = data.executionFailed(failedJobKey) + + if (workflowDescriptor.getWorkflowOption(WorkflowFailureMode).contains(ContinueWhilePossible.toString)) { + newData.workflowCompletionStatus match { + case Some(completionStatus) if completionStatus == Failed => + context.parent ! WorkflowExecutionFailedResponse(newData.jobExecutionMap, reason) + goto(WorkflowExecutionFailedState) using newData + case _ => + stay() using startRunnableScopes(newData) + } + } else { + context.parent ! WorkflowExecutionFailedResponse(newData.jobExecutionMap, reason) + goto(WorkflowExecutionFailedState) using newData + } + } + + private def handleWorkflowSuccessful(data: WorkflowExecutionActorData) = { + import cromwell.util.JsonFormatting.WdlValueJsonFormatter._ + import spray.json._ + + val (response, finalState) = workflowDescriptor.workflow.evaluateOutputs( + workflowDescriptor.inputs, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries + ) map { workflowOutputs => + workflowLogger.info( + s"""Workflow ${workflowDescriptor.workflow.unqualifiedName} complete. Final Outputs: + |${workflowOutputs.toJson.prettyPrint}""".stripMargin + ) + pushWorkflowOutputMetadata(workflowOutputs) + (WorkflowExecutionSucceededResponse(data.jobExecutionMap, workflowOutputs mapValues JobOutput.apply), WorkflowExecutionSuccessfulState) + } recover { + case ex => + (WorkflowExecutionFailedResponse(data.jobExecutionMap, ex), WorkflowExecutionFailedState) + } get + + context.parent ! response + goto(finalState) using data } private def handleRetryableFailure(jobKey: BackendJobDescriptorKey, reason: Throwable, returnCode: Option[Int]) = { @@ -427,103 +281,51 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, if (jobKey.attempt <= MaxRetries) { val newJobKey = jobKey.copy(attempt = jobKey.attempt + 1) workflowLogger.info(s"Retrying job execution for ${newJobKey.tag}") - /** Currently, we update the status of the old key to Preempted, and add a new entry (with the #attempts incremented by 1) + /* Currently, we update the status of the old key to Preempted, and add a new entry (with the #attempts incremented by 1) * to the execution store with status as NotStarted. This allows startRunnableCalls to re-execute this job */ val executionDiff = WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Preempted, newJobKey -> ExecutionStatus.NotStarted)) - val newData = stateData.mergeExecutionDiff(executionDiff) + val newData = stateData.mergeExecutionDiff(executionDiff).removeCallExecutionActor(jobKey) stay() using startRunnableScopes(newData) } else { workflowLogger.warn(s"Exhausted maximum number of retries for job ${jobKey.tag}. Failing.") - goto(WorkflowExecutionFailedState) using stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))) - } - } - - private def handleJobSuccessful(jobKey: JobKey, outputs: JobOutputs, data: WorkflowExecutionActorData) = { - workflowLogger.debug(s"Job ${jobKey.tag} succeeded!") - val newData = data.jobExecutionSuccess(jobKey, outputs) - - newData.workflowCompletionStatus match { - case Some(ExecutionStatus.Done) => - workflowLogger.info(newData.outputsJson()) - goto(WorkflowExecutionSuccessfulState) using newData - case Some(sts) => - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(new Exception("One or more jobs failed in fail-slow mode"))) - goto(WorkflowExecutionFailedState) using newData - case _ => - stay() using startRunnableScopes(newData) + goto(WorkflowExecutionFailedState) using stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))).removeCallExecutionActor(jobKey) } } - private def pushWorkflowOutputMetadata(data: WorkflowExecutionActorData) = { - val reportableOutputs = workflowDescriptor.backendDescriptor.workflowNamespace.workflow.outputs - val keyValues = data.outputStore.store filterKeys { - _.index.isEmpty - } flatMap { - case (key, value) => - value collect { - case entry if isReportableOutput(key.call, entry, reportableOutputs) => - s"${key.call.fullyQualifiedName}.${entry.name}" -> entry.wdlValue - } - } collect { - case (key, Some(wdlValue)) => (key, wdlValue) - } - - val events = keyValues match { - case empty if empty.isEmpty => List(MetadataEvent.empty(MetadataKey(workflowId, None, WorkflowMetadataKeys.Outputs))) - case _ => keyValues flatMap { - case (outputName, outputValue) => - wdlValueToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Outputs}:$outputName"), outputValue) - } - } - - serviceRegistryActor ! PutMetadataAction(events) + private def handleCallSuccessful(jobKey: JobKey, outputs: CallOutputs, data: WorkflowExecutionActorData, jobExecutionMap: JobExecutionMap) = { + handleExecutionSuccess(data.callExecutionSuccess(jobKey, outputs).addExecutions(jobExecutionMap)) } - - private def isReportableOutput(scope: Scope, entry: OutputEntry, - reportableOutputs: Seq[ReportableSymbol]): Boolean = { - reportableOutputs exists { reportableOutput => - reportableOutput.fullyQualifiedName == s"${scope.fullyQualifiedName}.${entry.name}" - } + + private def handleDeclarationEvaluationSuccessful(key: DeclarationKey, value: WdlValue, data: WorkflowExecutionActorData) = { + handleExecutionSuccess(data.declarationEvaluationSuccess(key, value)) } - - private def pushSuccessfulJobMetadata(jobKey: JobKey, returnCode: Option[Int], outputs: JobOutputs) = { - val completionEvents = completedJobMetadataEvents(jobKey, ExecutionStatus.Done, returnCode) - - val outputEvents = outputs match { - case empty if empty.isEmpty => - List(MetadataEvent.empty(metadataKey(jobKey, s"${CallMetadataKeys.Outputs}"))) + + private def handleExecutionSuccess(data: WorkflowExecutionActorData) = { + data.workflowCompletionStatus match { + case Some(ExecutionStatus.Done) => + handleWorkflowSuccessful(data) + case Some(sts) => + context.parent ! WorkflowExecutionFailedResponse(data.jobExecutionMap, new Exception("One or more jobs failed in fail-slow mode")) + goto(WorkflowExecutionFailedState) using data case _ => - outputs flatMap { case (lqn, value) => wdlValueToMetadataEvents(metadataKey(jobKey, s"${CallMetadataKeys.Outputs}:$lqn"), value.wdlValue) } + stay() using startRunnableScopes(data) } - - serviceRegistryActor ! PutMetadataAction(completionEvents ++ outputEvents) - } - - private def pushFailedJobMetadata(jobKey: JobKey, returnCode: Option[Int], failure: Throwable, retryableFailure: Boolean) = { - val failedState = if (retryableFailure) ExecutionStatus.Preempted else ExecutionStatus.Failed - val completionEvents = completedJobMetadataEvents(jobKey, failedState, returnCode) - val retryableFailureEvent = MetadataEvent(metadataKey(jobKey, CallMetadataKeys.RetryableFailure), MetadataValue(retryableFailure)) - val failureEvents = throwableToMetadataEvents(metadataKey(jobKey, s"${CallMetadataKeys.Failures}[$randomNumberString]"), failure).+:(retryableFailureEvent) - - serviceRegistryActor ! PutMetadataAction(completionEvents ++ failureEvents) } - - private def randomNumberString: String = Random.nextInt.toString.stripPrefix("-") - - private def completedJobMetadataEvents(jobKey: JobKey, executionStatus: ExecutionStatus, returnCode: Option[Int]) = { - val returnCodeEvent = returnCode map { rc => - List(MetadataEvent(metadataKey(jobKey, CallMetadataKeys.ReturnCode), MetadataValue(rc))) + + private def handleCallAborted(data: WorkflowExecutionActorData, jobKey: JobKey, jobExecutionMap: JobExecutionMap) = { + workflowLogger.info(s"$tag job aborted: ${jobKey.tag}") + val newStateData = data.removeCallExecutionActor(jobKey).addExecutions(jobExecutionMap) + if (!newStateData.hasRunningActors) { + workflowLogger.info(s"$tag all jobs aborted") + goto(WorkflowExecutionAbortedState) + } else { + stay() using newStateData } - - List( - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.ExecutionStatus), MetadataValue(executionStatus)), - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.End), MetadataValue(OffsetDateTime.now)) - ) ++ returnCodeEvent.getOrElse(List.empty) } /** * Attempt to start all runnable jobs and return updated state data. This will create a new copy - * of the state data including new pending persists. + * of the state data. */ @tailrec private def startRunnableScopes(data: WorkflowExecutionActorData): WorkflowExecutionActorData = { @@ -537,6 +339,9 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, case k: BackendJobDescriptorKey => processRunnableJob(k, data) case k: ScatterKey => processRunnableScatter(k, data) case k: CollectorKey => processRunnableCollector(k, data) + case k: SubWorkflowKey => processRunnableSubWorkflow(k, data) + case k: StaticDeclarationKey => processRunnableStaticDeclaration(k) + case k: DynamicDeclarationKey => processRunnableDynamicDeclaration(k, data) case k => val exception = new UnsupportedOperationException(s"Unknown entry in execution store: ${k.tag}") self ! JobInitializationFailed(k, exception) @@ -544,35 +349,42 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, } TryUtil.sequence(executionDiffs) match { - case Success(diffs) if diffs.exists(_.containsNewEntry) => startRunnableScopes(data.mergeExecutionDiffs(diffs)) - case Success(diffs) => data.mergeExecutionDiffs(diffs) - case Failure(e) => data + case Success(diffs) => + // Update the metadata for the jobs we just sent to EJEAs (they'll start off queued up waiting for tokens): + pushQueuedCallMetadata(diffs) + if (diffs.exists(_.containsNewEntry)) { + startRunnableScopes(data.mergeExecutionDiffs(diffs)) + } else { + data.mergeExecutionDiffs(diffs) + } + case Failure(e) => throw new RuntimeException("Unexpected engine failure", e) } } - private def pushNewJobMetadata(jobKey: BackendJobDescriptorKey, backendName: String) = { - val startEvents = List( - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.Start), MetadataValue(OffsetDateTime.now)), - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.Backend), MetadataValue(backendName)) - ) - - serviceRegistryActor ! PutMetadataAction(startEvents) + def processRunnableStaticDeclaration(declaration: StaticDeclarationKey) = { + self ! DeclarationEvaluationSucceededResponse(declaration, declaration.value) + Success(WorkflowExecutionDiff(Map(declaration -> ExecutionStatus.Running))) } + + def processRunnableDynamicDeclaration(declaration: DynamicDeclarationKey, data: WorkflowExecutionActorData) = { + val scatterMap = declaration.index flatMap { i => + // Will need update for nested scatters + declaration.scope.ancestry collectFirst { case s: Scatter => Map(s -> i) } + } getOrElse Map.empty[Scatter, Int] - private def pushRunningJobMetadata(jobDescriptor: BackendJobDescriptor) = { - val inputEvents = jobDescriptor.inputs match { - case empty if empty.isEmpty => - List(MetadataEvent.empty(metadataKey(jobDescriptor.key, s"${CallMetadataKeys.Inputs}"))) - case inputs => - inputs flatMap { - case (inputName, inputValue) => - wdlValueToMetadataEvents(metadataKey(jobDescriptor.key, s"${CallMetadataKeys.Inputs}:$inputName"), inputValue) - } + val lookup = declaration.scope.lookupFunction( + workflowDescriptor.workflowInputs, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries, + scatterMap + ) + + declaration.requiredExpression.evaluate(lookup, data.expressionLanguageFunctions) match { + case Success(result) => self ! DeclarationEvaluationSucceededResponse(declaration, result) + case Failure(ex) => self ! DeclarationEvaluationFailedResponse(declaration, ex) } - val runningEvent = List(MetadataEvent(metadataKey(jobDescriptor.key, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.Running))) - - serviceRegistryActor ! PutMetadataAction(runningEvent ++ inputEvents) + Success(WorkflowExecutionDiff(Map(declaration -> ExecutionStatus.Running))) } private def processRunnableJob(jobKey: BackendJobDescriptorKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { @@ -583,7 +395,7 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, workflowLogger.error(exception, s"$tag $message") throw exception case Some(backendName) => - factories.get(backendName) match { + backendFactories.get(backendName) match { case Some(factory) => val ejeaName = s"${workflowDescriptor.id}-EngineJobExecutionActor-${jobKey.tag}" val backendSingleton = backendSingletonCollection.backendSingletonActors(backendName) @@ -591,32 +403,224 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, self, jobKey, data, factory, initializationData.get(backendName), restarting, serviceRegistryActor, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingleton, backendName, workflowDescriptor.callCachingMode) val ejeaRef = context.actorOf(ejeaProps, ejeaName) - pushNewJobMetadata(jobKey, backendName) + context watch ejeaRef + pushNewCallMetadata(jobKey, Option(backendName)) ejeaRef ! EngineJobExecutionActor.Execute - Success(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Starting))) + Success(WorkflowExecutionDiff( + executionStoreChanges = Map(jobKey -> ExecutionStatus.QueuedInCromwell), + engineJobExecutionActorAdditions = Map(ejeaRef -> jobKey))) case None => throw WorkflowExecutionException(NonEmptyList.of(new Exception(s"Could not get BackendLifecycleActor for backend $backendName"))) } } } + + private def processRunnableSubWorkflow(key: SubWorkflowKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { + val sweaRef = context.actorOf( + SubWorkflowExecutionActor.props(key, data, backendFactories, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, + callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, initializationData, restarting), + s"SubWorkflowExecutionActor-${key.tag}" + ) + + context watch sweaRef + pushNewCallMetadata(key, None) + sweaRef ! SubWorkflowExecutionActor.Execute + + Success(WorkflowExecutionDiff(executionStoreChanges = Map(key -> ExecutionStatus.QueuedInCromwell), + engineJobExecutionActorAdditions = Map(sweaRef -> key))) + } private def processRunnableScatter(scatterKey: ScatterKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { - val lookup = data.hierarchicalLookup(scatterKey.scope, None) _ + val lookup = scatterKey.scope.lookupFunction( + workflowDescriptor.workflowInputs, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries + ) scatterKey.scope.collection.evaluate(lookup, data.expressionLanguageFunctions) map { - case a: WdlArray => WorkflowExecutionDiff(scatterKey.populate(a.value.size) + (scatterKey -> ExecutionStatus.Done)) + case a: WdlArray => WorkflowExecutionDiff(scatterKey.populate(a.value.size, workflowDescriptor.inputs) + (scatterKey -> ExecutionStatus.Done)) case v: WdlValue => throw new RuntimeException("Scatter collection must evaluate to an array") } } private def processRunnableCollector(collector: CollectorKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { - val shards = data.executionStore.findShardEntries(collector) collect { case (k: BackendJobDescriptorKey, v) if v == ExecutionStatus.Done => k } + val shards = data.executionStore.findShardEntries(collector) collect { + case (k: CallKey, v) if v == ExecutionStatus.Done => k + case (k: DynamicDeclarationKey, v) if v == ExecutionStatus.Done => k + } data.outputStore.generateCollectorOutput(collector, shards) match { case Failure(e) => Failure(new RuntimeException(s"Failed to collect output shards for call ${collector.tag}")) case Success(outputs) => self ! ScatterCollectionSucceededResponse(collector, outputs) Success(WorkflowExecutionDiff(Map(collector -> ExecutionStatus.Starting))) } } - - private def metadataKey(jobKey: JobKey, myKey: String) = MetadataKey(workflowDescriptor.id, Option(MetadataJobKey(jobKey.scope.fullyQualifiedName, jobKey.index, jobKey.attempt)), myKey) } + +object WorkflowExecutionActor { + + /** + * States + */ + sealed trait WorkflowExecutionActorState { + def terminal = false + } + + sealed trait WorkflowExecutionActorTerminalState extends WorkflowExecutionActorState { + override val terminal = true + } + + case object WorkflowExecutionPendingState extends WorkflowExecutionActorState + + case object WorkflowExecutionInProgressState extends WorkflowExecutionActorState + + case object WorkflowExecutionAbortingState extends WorkflowExecutionActorState + + case object WorkflowExecutionSuccessfulState extends WorkflowExecutionActorTerminalState + + case object WorkflowExecutionFailedState extends WorkflowExecutionActorTerminalState + + case object WorkflowExecutionAbortedState extends WorkflowExecutionActorTerminalState + + /** + * Commands + */ + sealed trait WorkflowExecutionActorCommand + + case object ExecuteWorkflowCommand extends WorkflowExecutionActorCommand + + /** + * Responses + */ + sealed trait WorkflowExecutionActorResponse { + def jobExecutionMap: JobExecutionMap + } + + case class WorkflowExecutionSucceededResponse(jobExecutionMap: JobExecutionMap, outputs: CallOutputs) + extends WorkflowExecutionActorResponse { + override def toString = "WorkflowExecutionSucceededResponse" + } + + case class WorkflowExecutionAbortedResponse(jobExecutionMap: JobExecutionMap) + extends WorkflowExecutionActorResponse with EngineLifecycleActorAbortedResponse { + override def toString = "WorkflowExecutionAbortedResponse" + } + + final case class WorkflowExecutionFailedResponse(jobExecutionMap: JobExecutionMap, reason: Throwable) extends WorkflowExecutionActorResponse { + override def toString = "WorkflowExecutionFailedResponse" + } + + /** + * Internal control flow messages + */ + private case class JobInitializationFailed(jobKey: JobKey, throwable: Throwable) + + private case class ScatterCollectionFailedResponse(collectorKey: CollectorKey, throwable: Throwable) + + private case class ScatterCollectionSucceededResponse(collectorKey: CollectorKey, outputs: CallOutputs) + + private case class DeclarationEvaluationSucceededResponse(declarationKey: DeclarationKey, value: WdlValue) + + private case class DeclarationEvaluationFailedResponse(declarationKey: DeclarationKey, reason: Throwable) + + case class SubWorkflowSucceededResponse(key: SubWorkflowKey, jobExecutionMap: JobExecutionMap, outputs: CallOutputs) + + case class SubWorkflowFailedResponse(key: SubWorkflowKey, jobExecutionMap: JobExecutionMap, reason: Throwable) + + case class SubWorkflowAbortedResponse(key: SubWorkflowKey, jobExecutionMap: JobExecutionMap) + + /** + * Internal ADTs + */ + case class ScatterKey(scope: Scatter) extends JobKey { + override val index = None + // When scatters are nested, this might become Some(_) + override val attempt = 1 + override val tag = scope.unqualifiedName + + /** + * Creates a sub-ExecutionStore with Starting entries for each of the scoped children. + * + * @param count Number of ways to scatter the children. + * @return ExecutionStore of scattered children. + */ + def populate(count: Int, workflowCoercedInputs: WorkflowCoercedInputs): Map[JobKey, ExecutionStatus.Value] = { + val keys = this.scope.children flatMap { + explode(_, count, workflowCoercedInputs) + } + keys map { + _ -> ExecutionStatus.NotStarted + } toMap + } + + private def explode(scope: Scope, count: Int, workflowCoercedInputs: WorkflowCoercedInputs): Seq[JobKey] = { + scope match { + case call: TaskCall => + val shards = (0 until count) map { i => BackendJobDescriptorKey(call, Option(i), 1) } + shards :+ CollectorKey(call) + case call: WorkflowCall => + val shards = (0 until count) map { i => SubWorkflowKey(call, Option(i), 1) } + shards :+ CollectorKey(call) + case declaration: Declaration => + val shards = (0 until count) map { i => DeclarationKey(declaration, Option(i), workflowCoercedInputs) } + shards :+ CollectorKey(declaration) + case scatter: Scatter => + throw new UnsupportedOperationException("Nested Scatters are not supported (yet) ... but you might try a sub workflow to achieve the same effect!") + case e => + throw new UnsupportedOperationException(s"Scope ${e.getClass.getName} is not supported.") + } + } + } + + // Represents a scatter collection for a call in the execution store + case class CollectorKey(scope: Scope with GraphNode) extends JobKey { + override val index = None + override val attempt = 1 + override val tag = s"Collector-${scope.unqualifiedName}" + } + + case class SubWorkflowKey(scope: WorkflowCall, index: ExecutionIndex, attempt: Int) extends CallKey { + override val tag = s"SubWorkflow-${scope.unqualifiedName}:${index.fromIndex}:$attempt" + } + + object DeclarationKey { + def apply(declaration: Declaration, index: ExecutionIndex, inputs: WorkflowCoercedInputs): DeclarationKey = { + inputs.find(_._1 == declaration.fullyQualifiedName) match { + case Some((_, value)) => StaticDeclarationKey(declaration, index, value) + case None => declaration.expression map { expression => + DynamicDeclarationKey(declaration, index, expression) + } getOrElse { + throw new RuntimeException(s"Found a declaration ${declaration.fullyQualifiedName} without expression and without input value. This should have been a validation error.") + } + } + } + } + + sealed trait DeclarationKey extends JobKey { + override val attempt = 1 + override val tag = s"Declaration-${scope.unqualifiedName}:${index.fromIndex}:$attempt" + } + + case class StaticDeclarationKey(scope: Declaration, index: ExecutionIndex, value: WdlValue) extends DeclarationKey + + case class DynamicDeclarationKey(scope: Declaration, index: ExecutionIndex, requiredExpression: WdlExpression) extends DeclarationKey + + case class WorkflowExecutionException[T <: Throwable](exceptions: NonEmptyList[T]) extends ThrowableAggregation { + override val throwables = exceptions.toList + override val exceptionContext = s"WorkflowExecutionActor" + } + + private lazy val DefaultMaxRetriesFallbackValue = 10 + + def props(workflowDescriptor: EngineWorkflowDescriptor, + serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean): Props = { + Props(WorkflowExecutionActor(workflowDescriptor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, + callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, initializationData, restarting)).withDispatcher(EngineDispatcher) + } +} \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala index 599c8f1b4a5..4bf2f213cb5 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala @@ -1,55 +1,100 @@ package cromwell.engine.workflow.lifecycle.execution import akka.actor.ActorRef +import cromwell.backend._ import cromwell.core.ExecutionStatus._ -import cromwell.core.OutputStore.{OutputCallKey, OutputEntry} import cromwell.core._ +import cromwell.engine.workflow.lifecycle.execution.OutputStore.{OutputCallKey, OutputEntry} +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{DeclarationKey, SubWorkflowKey} import cromwell.engine.{EngineWorkflowDescriptor, WdlFunctions} import cromwell.util.JsonFormatting.WdlValueJsonFormatter -import wdl4s.Scope - +import wdl4s.values.WdlValue +import wdl4s.{GraphNode, Scope} object WorkflowExecutionDiff { def empty = WorkflowExecutionDiff(Map.empty) } /** Data differential between current execution data, and updates performed in a method that needs to be merged. */ -final case class WorkflowExecutionDiff(executionStore: Map[JobKey, ExecutionStatus]) { - def containsNewEntry = executionStore.exists(_._2 == NotStarted) +final case class WorkflowExecutionDiff(executionStoreChanges: Map[JobKey, ExecutionStatus], + engineJobExecutionActorAdditions: Map[ActorRef, JobKey] = Map.empty) { + def containsNewEntry = executionStoreChanges.exists(_._2 == NotStarted) +} + +object WorkflowExecutionActorData { + def empty(workflowDescriptor: EngineWorkflowDescriptor) = { + new WorkflowExecutionActorData( + workflowDescriptor, + ExecutionStore.empty, + Map.empty, + Map.empty, + Map.empty, + Map.empty, + OutputStore.empty + ) + } } case class WorkflowExecutionActorData(workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, backendJobExecutionActors: Map[JobKey, ActorRef], - outputStore: OutputStore) extends WdlLookup { + engineCallExecutionActors: Map[ActorRef, JobKey], + subWorkflowExecutionActors: Map[SubWorkflowKey, ActorRef], + downstreamExecutionMap: JobExecutionMap, + outputStore: OutputStore) { + + val expressionLanguageFunctions = new WdlFunctions(workflowDescriptor.pathBuilders) + + def callExecutionSuccess(jobKey: JobKey, outputs: CallOutputs) = { + val (newJobExecutionActors, newSubWorkflowExecutionActors) = jobKey match { + case jobKey: BackendJobDescriptorKey => (backendJobExecutionActors - jobKey, subWorkflowExecutionActors) + case swKey: SubWorkflowKey => (backendJobExecutionActors, subWorkflowExecutionActors - swKey) + case _ => (backendJobExecutionActors, subWorkflowExecutionActors) + } - override val expressionLanguageFunctions = new WdlFunctions(workflowDescriptor.engineFilesystems) + this.copy( + executionStore = executionStore.add(Map(jobKey -> Done)), + backendJobExecutionActors = newJobExecutionActors, + subWorkflowExecutionActors = newSubWorkflowExecutionActors, + outputStore = outputStore.add(updateSymbolStoreEntry(jobKey, outputs)) + ) + } + + def declarationEvaluationSuccess(declarationKey: DeclarationKey, value: WdlValue) = { + val outputStoreKey = OutputCallKey(declarationKey.scope, declarationKey.index) + val outputStoreValue = OutputEntry(declarationKey.scope.unqualifiedName, value.wdlType, Option(value)) + this.copy( + executionStore = executionStore.add(Map(declarationKey -> Done)), + outputStore = outputStore.add(Map(outputStoreKey -> List(outputStoreValue))) + ) + } - def jobExecutionSuccess(jobKey: JobKey, outputs: JobOutputs) = this.copy( - executionStore = executionStore.add(Map(jobKey -> Done)), - backendJobExecutionActors = backendJobExecutionActors - jobKey, - outputStore = outputStore.add(updateSymbolStoreEntry(jobKey, outputs)) - ) + def executionFailed(jobKey: JobKey) = mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))) /** Add the outputs for the specified `JobKey` to the symbol cache. */ - private def updateSymbolStoreEntry(jobKey: JobKey, outputs: JobOutputs) = { + private def updateSymbolStoreEntry(jobKey: JobKey, outputs: CallOutputs) = { val newOutputEntries = outputs map { case (name, value) => OutputEntry(name, value.wdlValue.wdlType, Option(value.wdlValue)) } - Map(OutputCallKey(jobKey.scope, jobKey.index) -> newOutputEntries) + Map(OutputCallKey(jobKey.scope, jobKey.index) -> newOutputEntries.toList) } /** Checks if the workflow is completed by scanning through the executionStore. * If complete, this will return Some(finalStatus). Otherwise, returns None */ def workflowCompletionStatus: Option[ExecutionStatus] = { // `List`ify the `prerequisiteScopes` to avoid expensive hashing of `Scope`s when assembling the result. - def upstream(scope: Scope): List[Scope] = scope.prerequisiteScopes.toList ++ scope.prerequisiteScopes.toList.flatMap(upstream) - def upstreamFailed(scope: Scope) = upstream(scope) filter { s => - executionStore.store.map({ case (a, b) => a.scope -> b }).get(s).contains(Failed) + def upstream(scope: GraphNode): List[Scope] = { + val directUpstream: List[Scope with GraphNode] = scope.upstream.toList + directUpstream ++ directUpstream.flatMap(upstream) + } + def upstreamFailed(scope: Scope) = scope match { + case node: GraphNode => upstream(node) filter { s => + executionStore.store.exists({ case (key, status) => status == Failed && key.scope == s }) + } } // activeJobs is the subset of the executionStore that are either running or will run in the future. val activeJobs = executionStore.store.toList filter { - case (jobKey, jobStatus) => (jobStatus == NotStarted && upstreamFailed(jobKey.scope).isEmpty) || jobStatus == Starting || jobStatus == Running + case (jobKey, jobStatus) => (jobStatus == NotStarted && upstreamFailed(jobKey.scope).isEmpty) || jobStatus == QueuedInCromwell || jobStatus == Starting || jobStatus == Running } activeJobs match { @@ -63,13 +108,30 @@ case class WorkflowExecutionActorData(workflowDescriptor: EngineWorkflowDescript executionStore.store.values.exists(_ == ExecutionStatus.Failed) } - def addBackendJobExecutionActor(jobKey: JobKey, actor: Option[ActorRef]): WorkflowExecutionActorData = actor match { - case Some(actorRef) => this.copy(backendJobExecutionActors = backendJobExecutionActors + (jobKey -> actorRef)) + def removeEngineJobExecutionActor(actorRef: ActorRef) = { + this.copy(engineCallExecutionActors = engineCallExecutionActors - actorRef) + } + + def addCallExecutionActor(jobKey: JobKey, actor: Option[ActorRef]): WorkflowExecutionActorData = actor match { + case Some(actorRef) => + jobKey match { + case jobKey: BackendJobDescriptorKey => this.copy(backendJobExecutionActors = backendJobExecutionActors + (jobKey -> actorRef)) + case swKey: SubWorkflowKey => this.copy(subWorkflowExecutionActors = subWorkflowExecutionActors + (swKey -> actorRef)) + case _ => this + } case None => this } - def removeBackendJobExecutionActor(jobKey: JobKey): WorkflowExecutionActorData = { - this.copy(backendJobExecutionActors = backendJobExecutionActors - jobKey) + def removeCallExecutionActor(jobKey: JobKey): WorkflowExecutionActorData = { + jobKey match { + case jobKey: BackendJobDescriptorKey => this.copy(backendJobExecutionActors = backendJobExecutionActors - jobKey) + case swKey: SubWorkflowKey => this.copy(subWorkflowExecutionActors = subWorkflowExecutionActors - swKey) + case _ => this + } + } + + def addExecutions(jobExecutionMap: JobExecutionMap): WorkflowExecutionActorData = { + this.copy(downstreamExecutionMap = downstreamExecutionMap ++ jobExecutionMap) } def outputsJson(): String = { @@ -86,11 +148,19 @@ case class WorkflowExecutionActorData(workflowDescriptor: EngineWorkflowDescript } def mergeExecutionDiff(diff: WorkflowExecutionDiff): WorkflowExecutionActorData = { - this.copy(executionStore = executionStore.add(diff.executionStore)) + this.copy( + executionStore = executionStore.add(diff.executionStoreChanges), + engineCallExecutionActors = engineCallExecutionActors ++ diff.engineJobExecutionActorAdditions) } def mergeExecutionDiffs(diffs: Traversable[WorkflowExecutionDiff]): WorkflowExecutionActorData = { diffs.foldLeft(this)((newData, diff) => newData.mergeExecutionDiff(diff)) } - + + def jobExecutionMap: JobExecutionMap = { + val keys = executionStore.store.collect({case (k: BackendJobDescriptorKey, status) if status != ExecutionStatus.NotStarted => k }).toList + downstreamExecutionMap updated (workflowDescriptor.backendDescriptor, keys) + } + + def hasRunningActors = backendJobExecutionActors.nonEmpty || subWorkflowExecutionActors.nonEmpty } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala new file mode 100644 index 00000000000..d569f1faeea --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala @@ -0,0 +1,37 @@ +package cromwell.engine.workflow.lifecycle.execution + +import java.time.OffsetDateTime + +import akka.actor.ActorRef +import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowState} +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} + +import scala.util.Random + +trait WorkflowMetadataHelper { + + def serviceRegistryActor: ActorRef + + def pushWorkflowStart(workflowId: WorkflowId) = { + val startEvent = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.StartTime), MetadataValue(OffsetDateTime.now.toString)) + serviceRegistryActor ! PutMetadataAction(startEvent) + } + + def pushWorkflowEnd(workflowId: WorkflowId) = { + val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.EndTime), MetadataValue(OffsetDateTime.now.toString)) + serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + } + + def pushWorkflowFailures(workflowId: WorkflowId, failures: List[Throwable]) = { + val failureEvents = failures flatMap { r => throwableToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Failures}[${Random.nextInt(Int.MaxValue)}]"), r) } + serviceRegistryActor ! PutMetadataAction(failureEvents) + } + + def pushCurrentStateToMetadataService(workflowId: WorkflowId, workflowState: WorkflowState): Unit = { + val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Status), + MetadataValue(workflowState)) + serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + } + +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala index 8c5331c424e..674b1ee88e8 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala @@ -1,7 +1,9 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching +import java.nio.file.Path + import cats.data.NonEmptyList -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.core.ExecutionIndex.IndexEnhancedIndex import cromwell.core.WorkflowId import cromwell.core.callcaching.HashResult @@ -19,7 +21,7 @@ final case class CallCachingEntryId(id: Int) * Given a database-layer CallCacheStore, this accessor can access the database with engine-friendly data types. */ class CallCache(database: CallCachingSqlDatabase) { - def addToCache(workflowId: WorkflowId, callCacheHashes: CallCacheHashes, response: SucceededResponse)(implicit ec: ExecutionContext): Future[Unit] = { + def addToCache(workflowId: WorkflowId, callCacheHashes: CallCacheHashes, response: JobSucceededResponse)(implicit ec: ExecutionContext): Future[Unit] = { val metaInfo = CallCachingEntry( workflowExecutionUuid = workflowId.toString, callFullyQualifiedName = response.jobKey.call.fullyQualifiedName, @@ -35,7 +37,7 @@ class CallCache(database: CallCachingSqlDatabase) { } private def addToCache(callCachingEntry: CallCachingEntry, hashes: Set[HashResult], - result: Iterable[WdlValueSimpleton], jobDetritus: Map[String, String]) + result: Iterable[WdlValueSimpleton], jobDetritus: Map[String, Path]) (implicit ec: ExecutionContext): Future[Unit] = { val hashesToInsert: Iterable[CallCachingHashEntry] = { @@ -51,7 +53,7 @@ class CallCache(database: CallCachingSqlDatabase) { val jobDetritusToInsert: Iterable[CallCachingDetritusEntry] = { jobDetritus map { - case (fileName, filePath) => CallCachingDetritusEntry(fileName, filePath) + case (fileName, filePath) => CallCachingDetritusEntry(fileName, filePath.toUri.toString) } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala index ef09d32e918..a0ec75fc8cd 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala @@ -33,4 +33,5 @@ object CallCacheInvalidateActor { sealed trait CallCacheInvalidatedResponse case object CallCacheInvalidatedSuccess extends CallCacheInvalidatedResponse +case object CallCacheInvalidationUnnecessary extends CallCacheInvalidatedResponse case class CallCacheInvalidatedFailure(t: Throwable) extends CallCacheInvalidatedResponse \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala index f0e9c01864a..c6e42b5cc7d 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala @@ -2,14 +2,14 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching import akka.actor.{Actor, ActorLogging, Props} import cromwell.backend.BackendJobExecutionActor -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.core.WorkflowId import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes import scala.concurrent.ExecutionContext import scala.util.{Failure, Success} -case class CallCacheWriteActor(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: BackendJobExecutionActor.SucceededResponse) extends Actor with ActorLogging { +case class CallCacheWriteActor(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: BackendJobExecutionActor.JobSucceededResponse) extends Actor with ActorLogging { implicit val ec: ExecutionContext = context.dispatcher @@ -30,7 +30,7 @@ case class CallCacheWriteActor(callCache: CallCache, workflowId: WorkflowId, cal } object CallCacheWriteActor { - def props(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: SucceededResponse): Props = + def props(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: JobSucceededResponse): Props = Props(CallCacheWriteActor(callCache, workflowId, callCacheHashes, succeededResponse)) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala index b4ad358f54d..053e8a14e72 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala @@ -65,7 +65,7 @@ case class EngineJobHashingActor(receiver: ActorRef, import cromwell.core.simpleton.WdlValueSimpleton._ - val inputSimpletons = jobDescriptor.inputs.simplify + val inputSimpletons = jobDescriptor.fullyQualifiedInputs.simplify val (fileInputSimpletons, nonFileInputSimpletons) = inputSimpletons partition { case WdlValueSimpleton(_, f: WdlFile) => true case _ => false @@ -107,7 +107,7 @@ case class EngineJobHashingActor(receiver: ActorRef, } val outputExpressionHashResults = jobDescriptor.call.task.outputs map { output => - HashResult(HashKey(s"output expression: ${output.wdlType.toWdlString} ${output.name}"), output.requiredExpression.valueString.md5HashValue) + HashResult(HashKey(s"output expression: ${output.wdlType.toWdlString} ${output.unqualifiedName}"), output.requiredExpression.valueString.md5HashValue) } // Build these all together for the final set of initial hashes: diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala index 1d3eedd9fae..d0350e66296 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala @@ -1,10 +1,12 @@ package cromwell.engine.workflow.lifecycle +import akka.actor.ActorRef import wdl4s._ -package object execution { - def splitFqn(fullyQualifiedName: FullyQualifiedName): (String, String) = { - val lastIndex = fullyQualifiedName.lastIndexOf(".") - (fullyQualifiedName.substring(0, lastIndex), fullyQualifiedName.substring(lastIndex + 1)) - } +package execution { + + import cromwell.core.CallKey + + final case class JobRunning(key: CallKey, inputs: EvaluatedTaskInputs, executionActor: Option[ActorRef]) + final case class JobStarting(callKey: CallKey) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala b/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala index b2afd6b5efd..99d10221dc9 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala @@ -84,7 +84,7 @@ class JobExecutionTokenDispenserActor extends Actor with ActorLogging { private def onTerminate(terminee: ActorRef): Unit = { tokenAssignments.get(terminee) match { case Some(token) => - log.error("Actor {} stopped without returning its Job Execution Token. Reclaiming it!", terminee) + log.debug("Actor {} stopped without returning its Job Execution Token. Reclaiming it!", terminee) self.tell(msg = JobExecutionTokenReturn(token), sender = terminee) case None => log.debug("Actor {} stopped while we were still watching it... but it doesn't have a token. Removing it from any queues if necessary", terminee) diff --git a/engine/src/test/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala similarity index 88% rename from engine/src/test/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala rename to engine/src/main/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala index a24be2d3259..0f04212f843 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala @@ -1,7 +1,7 @@ package cromwell.engine.workflow.workflowstore import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState import scala.concurrent.{ExecutionContext, Future} @@ -14,7 +14,7 @@ class InMemoryWorkflowStore extends WorkflowStore { * Adds the requested WorkflowSourceFiles to the store and returns a WorkflowId for each one (in order) * for tracking purposes. */ - override def add(sources: NonEmptyList[WorkflowSourceFiles])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { + override def add(sources: NonEmptyList[WorkflowSourceFilesCollection])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { val submittedWorkflows = sources map { SubmittedWorkflow(WorkflowId.randomId(), _, WorkflowStoreState.Submitted) } workflowStore = workflowStore ++ submittedWorkflows.toList Future.successful(submittedWorkflows map { _.id }) @@ -44,7 +44,7 @@ class InMemoryWorkflowStore extends WorkflowStore { override def initialize(implicit ec: ExecutionContext): Future[Unit] = Future.successful(()) } -final case class SubmittedWorkflow(id: WorkflowId, sources: WorkflowSourceFiles, state: WorkflowStoreState) { +final case class SubmittedWorkflow(id: WorkflowId, sources: WorkflowSourceFilesCollection, state: WorkflowStoreState) { def toWorkflowToStart: WorkflowToStart = { state match { case r: StartableState => WorkflowToStart(id, sources, r) diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala index 7056137c3af..29190617ee9 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala @@ -1,9 +1,12 @@ package cromwell.engine.workflow.workflowstore import java.time.OffsetDateTime +import javax.sql.rowset.serial.SerialBlob import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import com.typesafe.config.ConfigFactory +import net.ceedubs.ficus.Ficus._ +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.database.sql.SqlConverters._ import cromwell.database.sql.WorkflowStoreSqlDatabase import cromwell.database.sql.tables.WorkflowStoreEntry @@ -13,9 +16,13 @@ import scala.concurrent.{ExecutionContext, Future} case class SqlWorkflowStore(sqlDatabase: WorkflowStoreSqlDatabase) extends WorkflowStore { override def initialize(implicit ec: ExecutionContext): Future[Unit] = { - sqlDatabase.updateWorkflowState( - WorkflowStoreState.Running.toString, - WorkflowStoreState.Restartable.toString) + if (ConfigFactory.load().as[Option[Boolean]]("system.workflow-restart").getOrElse(true)) { + sqlDatabase.updateWorkflowState( + WorkflowStoreState.Running.toString, + WorkflowStoreState.Restartable.toString) + } else { + Future.successful(()) + } } override def remove(id: WorkflowId)(implicit ec: ExecutionContext): Future[Boolean] = { @@ -36,7 +43,7 @@ case class SqlWorkflowStore(sqlDatabase: WorkflowStoreSqlDatabase) extends Workf * Adds the requested WorkflowSourceFiles to the store and returns a WorkflowId for each one (in order) * for tracking purposes. */ - override def add(sources: NonEmptyList[WorkflowSourceFiles])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { + override def add(sources: NonEmptyList[WorkflowSourceFilesCollection])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { val asStoreEntries = sources map toWorkflowStoreEntry val returnValue = asStoreEntries map { workflowStore => WorkflowId.fromString(workflowStore.workflowExecutionUuid) } @@ -46,24 +53,27 @@ case class SqlWorkflowStore(sqlDatabase: WorkflowStoreSqlDatabase) extends Workf } private def fromWorkflowStoreEntry(workflowStoreEntry: WorkflowStoreEntry): WorkflowToStart = { - val sources = WorkflowSourceFiles( + val sources = WorkflowSourceFilesCollection( workflowStoreEntry.workflowDefinition.toRawString, workflowStoreEntry.workflowInputs.toRawString, - workflowStoreEntry.workflowOptions.toRawString) + workflowStoreEntry.workflowOptions.toRawString, + workflowStoreEntry.importsZipFile.map(b => b.getBytes(1, b.length.asInstanceOf[Int])) + ) WorkflowToStart( WorkflowId.fromString(workflowStoreEntry.workflowExecutionUuid), sources, fromDbStateStringToStartableState(workflowStoreEntry.workflowState)) } - private def toWorkflowStoreEntry(workflowSourceFiles: WorkflowSourceFiles): WorkflowStoreEntry = { + private def toWorkflowStoreEntry(workflowSourceFiles: WorkflowSourceFilesCollection): WorkflowStoreEntry = { WorkflowStoreEntry( WorkflowId.randomId().toString, workflowSourceFiles.wdlSource.toClob, workflowSourceFiles.inputsJson.toClob, workflowSourceFiles.workflowOptionsJson.toClob, WorkflowStoreState.Submitted.toString, - OffsetDateTime.now.toSystemTimestamp + OffsetDateTime.now.toSystemTimestamp, + workflowSourceFiles.importsZipFileOption.map(new SerialBlob(_)) ) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala index e3d7b44bedc..f4734f7bb73 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala @@ -1,7 +1,7 @@ package cromwell.engine.workflow.workflowstore import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState import scala.concurrent.{ExecutionContext, Future} @@ -14,7 +14,7 @@ trait WorkflowStore { * Adds the requested WorkflowSourceFiles to the store and returns a WorkflowId for each one (in order) * for tracking purposes. */ - def add(sources: NonEmptyList[WorkflowSourceFiles])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] + def add(sources: NonEmptyList[WorkflowSourceFilesCollection])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] /** * Retrieves up to n workflows which have not already been pulled into the engine and sets their pickedUp diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala index 24cb3a6a7ef..2ecccbce23b 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala @@ -4,7 +4,7 @@ import java.time.OffsetDateTime import akka.actor.{ActorLogging, ActorRef, LoggingFSM, Props} import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowSourceFiles} +import cromwell.core._ import cromwell.engine.workflow.WorkflowManagerActor import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ @@ -12,9 +12,10 @@ import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState import cromwell.services.metadata.MetadataService.{MetadataPutAcknowledgement, PutMetadataAction} import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} import org.apache.commons.lang3.exception.ExceptionUtils +import wdl4s.util.TryUtil import scala.concurrent.{ExecutionContext, Future} -import scala.util.{Failure, Success} +import scala.util.{Failure, Success, Try} case class WorkflowStoreActor(store: WorkflowStore, serviceRegistryActor: ActorRef) extends LoggingFSM[WorkflowStoreActorState, WorkflowStoreActorData] with ActorLogging { @@ -69,14 +70,14 @@ case class WorkflowStoreActor(store: WorkflowStore, serviceRegistryActor: ActorR private def startNewWork(command: WorkflowStoreActorCommand, sndr: ActorRef, nextData: WorkflowStoreActorData) = { val work: Future[Any] = command match { case cmd @ SubmitWorkflow(sourceFiles) => - store.add(NonEmptyList.of(sourceFiles)) map { ids => + storeWorkflowSources(NonEmptyList.of(sourceFiles)) map { ids => val id = ids.head registerSubmissionWithMetadataService(id, sourceFiles) sndr ! WorkflowSubmittedToStore(id) log.info("Workflow {} submitted.", id) } case cmd @ BatchSubmitWorkflows(sources) => - store.add(sources) map { ids => + storeWorkflowSources(sources) map { ids => val assignedSources = ids.toList.zip(sources.toList) assignedSources foreach { case (id, sourceFiles) => registerSubmissionWithMetadataService(id, sourceFiles) } sndr ! WorkflowsBatchSubmittedToStore(ids) @@ -116,6 +117,37 @@ case class WorkflowStoreActor(store: WorkflowStore, serviceRegistryActor: ActorR goto(Working) using nextData } + private def storeWorkflowSources(sources: NonEmptyList[WorkflowSourceFilesCollection]): Future[NonEmptyList[WorkflowId]] = { + for { + processedSources <- Future.fromTry(processSources(sources, _.asPrettyJson)) + workflowIds <- store.add(processedSources) + } yield workflowIds + } + + private def processSources(sources: NonEmptyList[WorkflowSourceFilesCollection], + processOptions: WorkflowOptions => WorkflowOptionsJson): + Try[NonEmptyList[WorkflowSourceFilesCollection]] = { + val nelTries: NonEmptyList[Try[WorkflowSourceFilesCollection]] = sources map processSource(processOptions) + val seqTries: Seq[Try[WorkflowSourceFilesCollection]] = nelTries.toList + val trySeqs: Try[Seq[WorkflowSourceFilesCollection]] = TryUtil.sequence(seqTries) + val tryNel: Try[NonEmptyList[WorkflowSourceFilesCollection]] = trySeqs.map(seq => NonEmptyList.fromList(seq.toList).get) + tryNel + } + + /** + * Runs processing on workflow source files before they are stored. + * + * @param processOptions How to process the workflow options + * @param source Original workflow source + * @return Attempted updated workflow source + */ + private def processSource(processOptions: WorkflowOptions => WorkflowOptionsJson) + (source: WorkflowSourceFilesCollection): Try[WorkflowSourceFilesCollection] = { + for { + processedWorkflowOptions <- WorkflowOptions.fromJsonString(source.workflowOptionsJson) + } yield source.copyOptions(processOptions(processedWorkflowOptions)) + } + private def addWorkCompletionHooks[A](command: WorkflowStoreActorCommand, work: Future[A]) = { work.onComplete { case Success(_) => @@ -152,7 +184,9 @@ case class WorkflowStoreActor(store: WorkflowStore, serviceRegistryActor: ActorR /** * Takes the workflow id and sends it over to the metadata service w/ default empty values for inputs/outputs */ - private def registerSubmissionWithMetadataService(id: WorkflowId, sourceFiles: WorkflowSourceFiles): Unit = { + private def registerSubmissionWithMetadataService(id: WorkflowId, originalSourceFiles: WorkflowSourceFilesCollection): Unit = { + val sourceFiles = processSource(_.clearEncryptedValues)(originalSourceFiles).get + val submissionEvents = List( MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionTime), MetadataValue(OffsetDateTime.now.toString)), MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Inputs)), @@ -186,8 +220,8 @@ object WorkflowStoreActor { private[workflowstore] case object Idle extends WorkflowStoreActorState sealed trait WorkflowStoreActorCommand - final case class SubmitWorkflow(source: WorkflowSourceFiles) extends WorkflowStoreActorCommand - final case class BatchSubmitWorkflows(sources: NonEmptyList[WorkflowSourceFiles]) extends WorkflowStoreActorCommand + final case class SubmitWorkflow(source: WorkflowSourceFilesCollection) extends WorkflowStoreActorCommand + final case class BatchSubmitWorkflows(sources: NonEmptyList[WorkflowSourceFilesCollection]) extends WorkflowStoreActorCommand final case class FetchRunnableWorkflows(n: Int) extends WorkflowStoreActorCommand final case class RemoveWorkflow(id: WorkflowId) extends WorkflowStoreActorCommand final case class AbortWorkflow(id: WorkflowId, manager: ActorRef) extends WorkflowStoreActorCommand diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala index 0d9481c47df..61bc37ee0d0 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala @@ -1,6 +1,6 @@ package cromwell.engine.workflow.workflowstore -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState sealed trait WorkflowStoreState {def isStartable: Boolean} @@ -12,4 +12,4 @@ object WorkflowStoreState { case object Restartable extends StartableState } -final case class WorkflowToStart(id: WorkflowId, sources: WorkflowSourceFiles, state: StartableState) +final case class WorkflowToStart(id: WorkflowId, sources: WorkflowSourceFilesCollection, state: StartableState) diff --git a/engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala b/engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala new file mode 100644 index 00000000000..a2eb585a395 --- /dev/null +++ b/engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala @@ -0,0 +1,15 @@ +package cromwell.jobstore + +import akka.actor.{Actor, Props} +import cromwell.jobstore.JobStoreActor._ + +class EmptyJobStoreActor extends Actor { + override def receive: Receive = { + case w: JobStoreWriterCommand => sender ! JobStoreWriteSuccess(w) + case _: QueryJobCompletion => sender ! JobNotComplete + } +} + +object EmptyJobStoreActor { + def props: Props = Props(new EmptyJobStoreActor()) +} diff --git a/engine/src/main/scala/cromwell/jobstore/jobstore_.scala b/engine/src/main/scala/cromwell/jobstore/jobstore_.scala index 7fbdd01079b..921183d3583 100644 --- a/engine/src/main/scala/cromwell/jobstore/jobstore_.scala +++ b/engine/src/main/scala/cromwell/jobstore/jobstore_.scala @@ -5,6 +5,6 @@ import cromwell.core.{WorkflowId, _} case class JobStoreKey(workflowId: WorkflowId, callFqn: String, index: Option[Int], attempt: Int) sealed trait JobResult -case class JobResultSuccess(returnCode: Option[Int], jobOutputs: JobOutputs) extends JobResult +case class JobResultSuccess(returnCode: Option[Int], jobOutputs: CallOutputs) extends JobResult case class JobResultFailure(returnCode: Option[Int], reason: Throwable, retryable: Boolean) extends JobResult diff --git a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala index cc0f5f4aaf7..37dea81331d 100644 --- a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala +++ b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala @@ -13,6 +13,7 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.engine.workflow.workflowstore.{SqlWorkflowStore, WorkflowStore, WorkflowStoreActor} import cromwell.jobstore.{JobStore, JobStoreActor, SqlJobStore} import cromwell.services.{ServiceRegistryActor, SingletonServicesStore} +import cromwell.subworkflowstore.{SqlSubWorkflowStore, SubWorkflowStoreActor} import net.ceedubs.ficus.Ficus._ /** * An actor which serves as the lord protector for the rest of Cromwell, allowing us to have more fine grain @@ -29,6 +30,7 @@ import net.ceedubs.ficus.Ficus._ private val logger = Logging(context.system, this) private val config = ConfigFactory.load() + val serverMode: Boolean lazy val serviceRegistryActor: ActorRef = context.actorOf(ServiceRegistryActor.props(config), "ServiceRegistryActor") lazy val numberOfWorkflowLogCopyWorkers = config.getConfig("system").as[Option[Int]]("number-of-workflow-log-copy-workers").getOrElse(DefaultNumberOfWorkflowLogCopyWorkers) @@ -44,6 +46,9 @@ import net.ceedubs.ficus.Ficus._ lazy val jobStore: JobStore = new SqlJobStore(SingletonServicesStore.databaseInterface) lazy val jobStoreActor = context.actorOf(JobStoreActor.props(jobStore), "JobStoreActor") + lazy val subWorkflowStore = new SqlSubWorkflowStore(SingletonServicesStore.databaseInterface) + lazy val subWorkflowStoreActor = context.actorOf(SubWorkflowStoreActor.props(subWorkflowStore), "SubWorkflowStoreActor") + lazy val callCache: CallCache = new CallCache(SingletonServicesStore.databaseInterface) lazy val callCacheReadActor = context.actorOf(RoundRobinPool(25) .props(CallCacheReadActor.props(callCache)), @@ -56,9 +61,12 @@ import net.ceedubs.ficus.Ficus._ lazy val jobExecutionTokenDispenserActor = context.actorOf(JobExecutionTokenDispenserActor.props) + def abortJobsOnTerminate: Boolean + lazy val workflowManagerActor = context.actorOf( WorkflowManagerActor.props( - workflowStoreActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobExecutionTokenDispenserActor, backendSingletonCollection), + workflowStoreActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, + jobExecutionTokenDispenserActor, backendSingletonCollection, abortJobsOnTerminate, serverMode), "WorkflowManagerActor") override def receive = { diff --git a/engine/src/main/scala/cromwell/server/CromwellServer.scala b/engine/src/main/scala/cromwell/server/CromwellServer.scala index dca31a625d3..36010fb620d 100644 --- a/engine/src/main/scala/cromwell/server/CromwellServer.scala +++ b/engine/src/main/scala/cromwell/server/CromwellServer.scala @@ -53,6 +53,9 @@ object CromwellServer { class CromwellServerActor(config: Config) extends CromwellRootActor with CromwellApiService with SwaggerService { implicit def executionContext = actorRefFactory.dispatcher + override val serverMode = true + override val abortJobsOnTerminate = false + override def actorRefFactory = context override def receive = handleTimeouts orElse runRoute(possibleRoutes) diff --git a/engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala b/engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala new file mode 100644 index 00000000000..166d7d685bd --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala @@ -0,0 +1,17 @@ +package cromwell.subworkflowstore + +import akka.actor.{Actor, ActorLogging, Props} +import cromwell.subworkflowstore.SubWorkflowStoreActor._ + +class EmptySubWorkflowStoreActor extends Actor with ActorLogging { + override def receive: Receive = { + case register: RegisterSubWorkflow => sender() ! SubWorkflowStoreRegisterSuccess(register) + case query: QuerySubWorkflow => sender() ! SubWorkflowNotFound(query) + case complete: WorkflowComplete =>sender() ! SubWorkflowStoreCompleteSuccess(complete) + case unknown => log.error(s"SubWorkflowStoreActor received unknown message: $unknown") + } +} + +object EmptySubWorkflowStoreActor { + def props: Props = Props(new EmptySubWorkflowStoreActor()) +} diff --git a/engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala b/engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala new file mode 100644 index 00000000000..64f21275ff4 --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala @@ -0,0 +1,31 @@ +package cromwell.subworkflowstore +import cromwell.database.sql.SubWorkflowStoreSqlDatabase +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} + +class SqlSubWorkflowStore(subWorkflowStoreSqlDatabase: SubWorkflowStoreSqlDatabase) extends SubWorkflowStore { + override def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] = { + subWorkflowStoreSqlDatabase.addSubWorkflowStoreEntry( + rootWorkflowExecutionUuid, + parentWorkflowExecutionUuid, + callFullyQualifiedName, + jobIndex, + jobAttempt, + subWorkflowExecutionUuid + ) + } + + override def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int)(implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] = { + subWorkflowStoreSqlDatabase.querySubWorkflowStore(parentWorkflowExecutionUuid, callFqn, jobIndex, jobAttempt) + } + + override def removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Int] = { + subWorkflowStoreSqlDatabase.removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid) + } +} diff --git a/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala new file mode 100644 index 00000000000..8ad92fa9bae --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala @@ -0,0 +1,19 @@ +package cromwell.subworkflowstore + +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} + +trait SubWorkflowStore { + def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] + + def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int) + (implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] + + def removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Int] +} diff --git a/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala new file mode 100644 index 00000000000..cf762408716 --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala @@ -0,0 +1,72 @@ +package cromwell.subworkflowstore + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import cromwell.core.ExecutionIndex._ +import cromwell.core.{JobKey, WorkflowId} +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import cromwell.subworkflowstore.SubWorkflowStoreActor._ + +import scala.concurrent.ExecutionContext +import scala.util.{Failure, Success} + +class SubWorkflowStoreActor(database: SubWorkflowStore) extends Actor with ActorLogging { + + implicit val ec: ExecutionContext = context.dispatcher + + override def receive = { + case register: RegisterSubWorkflow => registerSubWorkflow(sender(), register) + case query: QuerySubWorkflow => querySubWorkflow(sender(), query) + case complete: WorkflowComplete => workflowComplete(sender(), complete) + case unknown => log.error(s"SubWorkflowStoreActor received unknown message: $unknown") + } + + private def registerSubWorkflow(replyTo: ActorRef, command: RegisterSubWorkflow) = { + database.addSubWorkflowStoreEntry( + command.rootWorkflowExecutionUuid.toString, + command.parentWorkflowExecutionUuid.toString, + command.jobKey.scope.fullyQualifiedName, + command.jobKey.index.fromIndex, + command.jobKey.attempt, + command.subWorkflowExecutionUuid.toString + ) onComplete { + case Success(_) => replyTo ! SubWorkflowStoreRegisterSuccess(command) + case Failure(ex) => replyTo ! SubWorkflowStoreFailure(command, ex) + } + } + + private def querySubWorkflow(replyTo: ActorRef, command: QuerySubWorkflow) = { + val jobKey = command.jobKey + database.querySubWorkflowStore(command.parentWorkflowExecutionUuid.toString, jobKey.scope.fullyQualifiedName, jobKey.index.fromIndex, jobKey.attempt) onComplete { + case Success(Some(result)) => replyTo ! SubWorkflowFound(result) + case Success(None) => replyTo ! SubWorkflowNotFound(command) + case Failure(ex) => replyTo ! SubWorkflowStoreFailure(command, ex) + } + } + + private def workflowComplete(replyTo: ActorRef, command: WorkflowComplete) = { + database.removeSubWorkflowStoreEntries(command.workflowExecutionUuid.toString) onComplete { + case Success(_) => replyTo ! SubWorkflowStoreCompleteSuccess(command) + case Failure(ex) => replyTo ! SubWorkflowStoreFailure(command, ex) + } + } + +} + +object SubWorkflowStoreActor { + sealed trait SubWorkflowStoreActorCommand + case class RegisterSubWorkflow(rootWorkflowExecutionUuid: WorkflowId, parentWorkflowExecutionUuid: WorkflowId, jobKey: JobKey, subWorkflowExecutionUuid: WorkflowId) extends SubWorkflowStoreActorCommand + case class QuerySubWorkflow(parentWorkflowExecutionUuid: WorkflowId, jobKey: JobKey) extends SubWorkflowStoreActorCommand + case class WorkflowComplete(workflowExecutionUuid: WorkflowId) extends SubWorkflowStoreActorCommand + + sealed trait SubWorkflowStoreActorResponse + case class SubWorkflowStoreRegisterSuccess(command: RegisterSubWorkflow) extends SubWorkflowStoreActorResponse + case class SubWorkflowFound(subWorkflowStoreEntry: SubWorkflowStoreEntry) extends SubWorkflowStoreActorResponse + case class SubWorkflowNotFound(command: QuerySubWorkflow) extends SubWorkflowStoreActorResponse + case class SubWorkflowStoreCompleteSuccess(command: SubWorkflowStoreActorCommand) extends SubWorkflowStoreActorResponse + + case class SubWorkflowStoreFailure(command: SubWorkflowStoreActorCommand, failure: Throwable) extends SubWorkflowStoreActorResponse + + def props(database: SubWorkflowStore) = Props( + new SubWorkflowStoreActor(database) + ) +} diff --git a/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala b/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala index a2441abb4ac..76ec94d7785 100644 --- a/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala +++ b/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala @@ -21,8 +21,8 @@ object CromwellApiHandler { sealed trait ApiHandlerMessage - final case class ApiHandlerWorkflowSubmit(source: WorkflowSourceFiles) extends ApiHandlerMessage - final case class ApiHandlerWorkflowSubmitBatch(sources: NonEmptyList[WorkflowSourceFiles]) extends ApiHandlerMessage + final case class ApiHandlerWorkflowSubmit(source: WorkflowSourceFilesCollection) extends ApiHandlerMessage + final case class ApiHandlerWorkflowSubmitBatch(sources: NonEmptyList[WorkflowSourceFilesCollection]) extends ApiHandlerMessage final case class ApiHandlerWorkflowQuery(uri: Uri, parameters: Seq[(String, String)]) extends ApiHandlerMessage final case class ApiHandlerWorkflowStatus(id: WorkflowId) extends ApiHandlerMessage final case class ApiHandlerWorkflowOutputs(id: WorkflowId) extends ApiHandlerMessage @@ -66,7 +66,9 @@ class CromwellApiHandler(requestHandlerActor: ActorRef) extends Actor with Workf case WorkflowStoreActor.WorkflowSubmittedToStore(id) => context.parent ! RequestComplete((StatusCodes.Created, WorkflowSubmitResponse(id.toString, WorkflowSubmitted.toString))) - case ApiHandlerWorkflowSubmitBatch(sources) => requestHandlerActor ! WorkflowStoreActor.BatchSubmitWorkflows(sources) + case ApiHandlerWorkflowSubmitBatch(sources) => requestHandlerActor ! + WorkflowStoreActor.BatchSubmitWorkflows(sources.map(x => WorkflowSourceFilesWithoutImports(x.wdlSource,x.inputsJson,x.workflowOptionsJson))) + case WorkflowStoreActor.WorkflowsBatchSubmittedToStore(ids) => val responses = ids map { id => WorkflowSubmitResponse(id.toString, WorkflowSubmitted.toString) } diff --git a/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala b/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala index 54ddf278660..34fff945b3f 100644 --- a/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala +++ b/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala @@ -1,9 +1,9 @@ package cromwell.webservice import akka.actor._ -import java.lang.Throwable + import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowOptionsJson, WorkflowSourceFilesCollection} import cromwell.engine.backend.BackendConfiguration import cromwell.services.metadata.MetadataService._ import cromwell.webservice.WorkflowJsonSupport._ @@ -14,6 +14,9 @@ import spray.http._ import spray.httpx.SprayJsonSupport._ import spray.json._ import spray.routing._ +import wdl4s.{WdlJson, WdlSource} + +import scala.util.{Failure, Success, Try} trait SwaggerService extends SwaggerUiResourceHttpService { override def swaggerServiceName = "cromwell" @@ -50,8 +53,8 @@ trait CromwellApiService extends HttpService with PerRequestCreator { perRequest(requestContext, metadataBuilderProps, message) } - private def failBadRequest(exception: Exception, statusCode: StatusCode = StatusCodes.BadRequest) = respondWithMediaType(`application/json`) { - complete((statusCode, APIResponse.fail(exception).toJson.prettyPrint)) + private def failBadRequest(t: Throwable, statusCode: StatusCode = StatusCodes.BadRequest) = respondWithMediaType(`application/json`) { + complete((statusCode, APIResponse.fail(t).toJson.prettyPrint)) } val workflowRoutes = queryRoute ~ queryPostRoute ~ workflowOutputsRoute ~ submitRoute ~ submitBatchRoute ~ @@ -124,18 +127,70 @@ trait CromwellApiService extends HttpService with PerRequestCreator { } } + case class PartialWorkflowSources(wdlSource: Option[WdlSource], workflowInputs: Seq[WdlJson], workflowInputsAux: Map[Int, WdlJson], workflowOptions: Option[WorkflowOptionsJson], zippedImports: Option[Array[Byte]]) + object PartialWorkflowSources { + private def workflowInputs(bodyPart: BodyPart): Seq[WdlJson] = { + import spray.json._ + bodyPart.entity.data.asString.parseJson match { + case JsArray(Seq(x, xs@_*)) => (List(x) ++ xs).map(_.compactPrint) + case JsArray(_) => Seq.empty + case v: JsValue => Seq(v.compactPrint) + } + } + + def partialSourcesToSourceCollections(partialSources: Try[PartialWorkflowSources], allowNoInputs: Boolean): Try[Seq[WorkflowSourceFilesCollection]] = { + partialSources flatMap { + case PartialWorkflowSources(Some(wdlSource), workflowInputs, workflowInputsAux, workflowOptions, wdlDependencies) => + //The order of addition allows for the expected override of colliding keys. + val sortedInputAuxes = workflowInputsAux.toSeq.sortBy(_._1).map(x => Option(x._2)) + val wfInputs: Try[Seq[WdlJson]] = if (workflowInputs.isEmpty) { + if (allowNoInputs) Success(Seq("{}")) else Failure(new IllegalArgumentException("No inputs were provided")) + } else Success(workflowInputs map { workflowInputSet => + mergeMaps(Seq(Option(workflowInputSet)) ++ sortedInputAuxes).toString + }) + wfInputs.map(_.map(x => WorkflowSourceFilesCollection(wdlSource, x, workflowOptions.getOrElse("{}"), wdlDependencies))) + case other => Failure(new IllegalArgumentException(s"Incomplete workflow submission: $other")) + } + } + + def fromSubmitRoute(formData: MultipartFormData, allowNoInputs: Boolean): Try[Seq[WorkflowSourceFilesCollection]] = { + val partialSources = Try(formData.fields.foldLeft(PartialWorkflowSources(None, Seq.empty, Map.empty, None, None)) { (partialSources: PartialWorkflowSources, bodyPart: BodyPart) => + if (bodyPart.name.contains("wdlSource")) { + partialSources.copy(wdlSource = Some(bodyPart.entity.data.asString)) + } else if (bodyPart.name.contains("workflowInputs")) { + partialSources.copy(workflowInputs = workflowInputs(bodyPart)) + } else if (bodyPart.name.forall(_.startsWith("workflowInputs_"))) { + val index = bodyPart.name.get.stripPrefix("workflowInputs_").toInt + partialSources.copy(workflowInputsAux = partialSources.workflowInputsAux + (index -> bodyPart.entity.data.asString)) + } else if (bodyPart.name.contains("workflowOptions")) { + partialSources.copy(workflowOptions = Some(bodyPart.entity.data.asString)) + } else if (bodyPart.name.contains("wdlDependencies")) { + partialSources.copy(zippedImports = Some(bodyPart.entity.data.toByteArray)) + } else { + throw new IllegalArgumentException(s"Unexpected body part name: ${bodyPart.name.getOrElse("None")}") + } + }) + partialSourcesToSourceCollections(partialSources, allowNoInputs) + } + } + def submitRoute = path("workflows" / Segment) { version => post { - formFields("wdlSource", "workflowInputs".?, "workflowInputs_2".?, "workflowInputs_3".?, - "workflowInputs_4".?, "workflowInputs_5".?, "workflowOptions".?) { - (wdlSource, workflowInputs, workflowInputs_2, workflowInputs_3, workflowInputs_4, workflowInputs_5, workflowOptions) => - requestContext => - //The order of addition allows for the expected override of colliding keys. - val wfInputs = mergeMaps(Seq(workflowInputs, workflowInputs_2, workflowInputs_3, workflowInputs_4, workflowInputs_5)).toString - - val workflowSourceFiles = WorkflowSourceFiles(wdlSource, wfInputs, workflowOptions.getOrElse("{}")) - perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmit(workflowSourceFiles)) + entity(as[MultipartFormData]) { formData => + requestContext => { + PartialWorkflowSources.fromSubmitRoute(formData, allowNoInputs = true) match { + case Success(workflowSourceFiles) if workflowSourceFiles.size == 1 => + perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmit(workflowSourceFiles.head)) + case Success(workflowSourceFiles) => + failBadRequest(new IllegalArgumentException("To submit more than one workflow at a time, use the batch endpoint.")) + case Failure(t) => + System.err.println(t) + t.printStackTrace(System.err) + failBadRequest(t) + } + () + } } } } @@ -143,19 +198,18 @@ trait CromwellApiService extends HttpService with PerRequestCreator { def submitBatchRoute = path("workflows" / Segment / "batch") { version => post { - formFields("wdlSource", "workflowInputs", "workflowOptions".?) { - (wdlSource, workflowInputs, workflowOptions) => - requestContext => - import spray.json._ - workflowInputs.parseJson match { - case JsArray(Seq(x, xs@_*)) => - val nelInputses = NonEmptyList.of(x, xs: _*) - val sources = nelInputses.map(inputs => WorkflowSourceFiles(wdlSource, inputs.compactPrint, workflowOptions.getOrElse("{}"))) - perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmitBatch(sources)) - case JsArray(_) => failBadRequest(new RuntimeException("Nothing was submitted")) - case _ => reject - } - () + entity(as[MultipartFormData]) { formData => + requestContext => { + PartialWorkflowSources.fromSubmitRoute(formData, allowNoInputs = false) match { + case Success(workflowSourceFiles) => + perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmitBatch(NonEmptyList.fromListUnsafe(workflowSourceFiles.toList))) + case Failure(t) => + System.err.println(t) + t.printStackTrace(System.err) + failBadRequest(t) + } + () + } } } } @@ -183,13 +237,21 @@ trait CromwellApiService extends HttpService with PerRequestCreator { parameterMultiMap { parameters => val includeKeysOption = NonEmptyList.fromList(parameters.getOrElse("includeKey", List.empty)) val excludeKeysOption = NonEmptyList.fromList(parameters.getOrElse("excludeKey", List.empty)) - (includeKeysOption, excludeKeysOption) match { - case (Some(_), Some(_)) => + val expandSubWorkflowsOption = { + parameters.get("expandSubWorkflows") match { + case Some(v :: Nil) => Try(v.toBoolean) + case _ => Success(false) + } + } + + (includeKeysOption, excludeKeysOption, expandSubWorkflowsOption) match { + case (Some(_), Some(_), _) => failBadRequest(new IllegalArgumentException("includeKey and excludeKey may not be specified together")) - case _ => + case (_, _, Success(expandSubWorkflows)) => withRecognizedWorkflowId(possibleWorkflowId) { id => - handleMetadataRequest(GetSingleWorkflowMetadataAction(id, includeKeysOption, excludeKeysOption)) + handleMetadataRequest(GetSingleWorkflowMetadataAction(id, includeKeysOption, excludeKeysOption, expandSubWorkflows)) } + case (_, _, Failure(ex)) => failBadRequest(new IllegalArgumentException(ex)) } } } diff --git a/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala b/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala index 3b83955a3cf..047eeccb9ae 100644 --- a/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala +++ b/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala @@ -19,9 +19,10 @@ final case class EngineStatsActor(workflowActors: List[ActorRef], replyTo: Actor private var jobCounts = Map.empty[ActorRef, Int] /* - It's possible that WorkflowActors might disappear behind us and never manage to write us back. - Instead of waiting longingly, watching a mailbox which might never receive some love instead wait - a specified period of time and assume anything which was going to reply already has + * FIXME + * Because of sub workflows there is currently no reliable way to know if we received responses from all running WEAs. + * For now, we always wait for the timeout duration before responding to give a chance to all WEAs to respond (even nested ones). + * This could be improved by having WEAs wait for their sub WEAs before sending back the response. */ val scheduledMsg = context.system.scheduler.scheduleOnce(timeout, self, ShutItDown) @@ -31,7 +32,6 @@ final case class EngineStatsActor(workflowActors: List[ActorRef], replyTo: Actor override def receive = { case JobCount(count) => jobCounts += (sender -> count) - if (jobCounts.size == workflowActors.size) reportStats() case ShutItDown => reportStats() case wompWomp => log.error("Unexpected message to EngineStatsActor: {}", wompWomp) @@ -59,5 +59,5 @@ object EngineStatsActor { final case class EngineStats(workflows: Int, jobs: Int) - val MaxTimeToWait = 30 seconds + val MaxTimeToWait = 3 seconds } diff --git a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala index a294c58fc9c..b2afe311f65 100644 --- a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala @@ -1,13 +1,15 @@ package cromwell.webservice +import java.nio.file.Paths import java.time.OffsetDateTime -import cromwell.core.WorkflowSourceFiles +import cromwell.core._ import cromwell.engine._ import cromwell.services.metadata.MetadataService import MetadataService.{WorkflowQueryResponse, WorkflowQueryResult} import cromwell.util.JsonFormatting.WdlValueJsonFormatter import WdlValueJsonFormatter._ +import better.files.File import spray.json.{DefaultJsonProtocol, JsString, JsValue, RootJsonFormat} object WorkflowJsonSupport extends DefaultJsonProtocol { @@ -18,7 +20,17 @@ object WorkflowJsonSupport extends DefaultJsonProtocol { implicit val callOutputResponseProtocol = jsonFormat3(CallOutputResponse) implicit val engineStatsProtocol = jsonFormat2(EngineStatsActor.EngineStats) implicit val callAttempt = jsonFormat2(CallAttempt) - implicit val workflowSourceData = jsonFormat3(WorkflowSourceFiles) + implicit val workflowSourceData = jsonFormat3(WorkflowSourceFilesWithoutImports) + + implicit object fileJsonFormat extends RootJsonFormat[File] { + override def write(obj: File) = JsString(obj.path.toAbsolutePath.toString) + override def read(json: JsValue): File = json match { + case JsString(str) => Paths.get(str) + case unknown => throw new NotImplementedError(s"Cannot parse $unknown to a File") + } + } + + implicit val workflowSourceDataWithImports = jsonFormat4(WorkflowSourceFilesWithDependenciesZip) implicit val errorResponse = jsonFormat3(FailureResponse) implicit val successResponse = jsonFormat3(SuccessResponse) diff --git a/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala b/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala index d9ed774383a..f51e64187ca 100644 --- a/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala +++ b/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala @@ -4,6 +4,7 @@ import java.time.OffsetDateTime import cats.{Monoid, Semigroup} import cats.instances.map._ +import cromwell.services.metadata.CallMetadataKeys import spray.json._ @@ -30,20 +31,33 @@ object IndexedJsonValue { /** Customized version of Json data structure, to account for timestamped values and lazy array creation */ sealed trait TimestampedJsValue { - def toJson: JsValue + def toJson(expandedValues: Map[String, JsValue]): JsValue def timestamp: OffsetDateTime } private case class TimestampedJsList(v: Map[Int, TimestampedJsValue], timestamp: OffsetDateTime) extends TimestampedJsValue { - override val toJson = JsArray(v.values.toVector map { _.toJson }) + override def toJson(expandedValues: Map[String, JsValue]) = JsArray(v.values.toVector map { _.toJson(expandedValues) }) } private case class TimestampedJsObject(v: Map[String, TimestampedJsValue], timestamp: OffsetDateTime) extends TimestampedJsValue { - override val toJson = JsObject(v mapValues { _.toJson }) + override def toJson(expandedValues: Map[String, JsValue]) = { + val mappedValues = v map { + case (key, subWorkflowId: TimestampedJsPrimitive) if key == CallMetadataKeys.SubWorkflowId => + val subId = subWorkflowId.v.asInstanceOf[JsString] + expandedValues.get(subId.value) map { subMetadata => + CallMetadataKeys.SubWorkflowMetadata -> subMetadata + } getOrElse { + key -> subWorkflowId.v + } + case (key, value) => key -> value.toJson(expandedValues) + } + + JsObject(mappedValues) + } } private class TimestampedJsPrimitive(val v: JsValue, val timestamp: OffsetDateTime) extends TimestampedJsValue { - override val toJson = v + override def toJson(expandedValues: Map[String, JsValue]) = v } private case class TimestampedEmptyJson(override val timestamp: OffsetDateTime) extends TimestampedJsPrimitive(JsObject(Map.empty[String, JsValue]), timestamp) \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala b/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala index 0653be4256d..272e94c75a5 100644 --- a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala +++ b/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala @@ -13,8 +13,8 @@ import cromwell.services.ServiceRegistryActor.ServiceRegistryFailure import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ import cromwell.webservice.PerRequest.{RequestComplete, RequestCompleteWithHeaders} -import cromwell.webservice.metadata.MetadataBuilderActor.{Idle, MetadataBuilderActorState, WaitingForMetadataService} -import cromwell.webservice.{APIResponse, WorkflowJsonSupport} +import cromwell.webservice.metadata.MetadataBuilderActor.{Idle, MetadataBuilderActorData, MetadataBuilderActorState, WaitingForMetadataService, WaitingForSubWorkflows} +import cromwell.webservice.{APIResponse, PerRequestCreator, WorkflowJsonSupport} import org.slf4j.LoggerFactory import spray.http.{StatusCodes, Uri} import spray.httpx.SprayJsonSupport._ @@ -29,7 +29,21 @@ object MetadataBuilderActor { sealed trait MetadataBuilderActorState case object Idle extends MetadataBuilderActorState case object WaitingForMetadataService extends MetadataBuilderActorState - + case object WaitingForSubWorkflows extends MetadataBuilderActorState + + case class MetadataBuilderActorData( + originalQuery: MetadataQuery, + originalEvents: Seq[MetadataEvent], + subWorkflowsMetadata: Map[String, JsValue], + waitFor: Int + ) { + def withSubWorkflow(id: String, metadata: JsValue) = { + this.copy(subWorkflowsMetadata = subWorkflowsMetadata + ((id, metadata))) + } + + def isComplete = subWorkflowsMetadata.size == waitFor + } + def props(serviceRegistryActor: ActorRef) = { Props(new MetadataBuilderActor(serviceRegistryActor)).withDispatcher(ApiDispatcher) } @@ -138,8 +152,8 @@ object MetadataBuilderActor { events.toList map { e => keyValueToIndexedJson(e.key.key, e.value, e.offsetDateTime) } combineAll } - private def eventsToAttemptMetadata(attempt: Int, events: Seq[MetadataEvent]) = { - val withAttemptField = JsObject(eventsToIndexedJson(events).toJson.asJsObject.fields + (AttemptKey -> JsNumber(attempt))) + private def eventsToAttemptMetadata(expandedValues: Map[String, JsValue])(attempt: Int, events: Seq[MetadataEvent]) = { + val withAttemptField = JsObject(eventsToIndexedJson(events).toJson(expandedValues).asJsObject.fields + (AttemptKey -> JsNumber(attempt))) MetadataForAttempt(attempt, withAttemptField) } @@ -160,10 +174,10 @@ object MetadataBuilderActor { workflowNonStatusEvents ++ sortedStateEvents.headOption.toList } - private def parseWorkflowEventsToTimestampedJsValue(events: Seq[MetadataEvent], includeCallsIfEmpty: Boolean): JsObject = { + private def parseWorkflowEventsToTimestampedJsValue(events: Seq[MetadataEvent], includeCallsIfEmpty: Boolean, expandedValues: Map[String, JsValue]): JsObject = { // Partition if sequence of events in a pair of (Workflow level events, Call level events) val (workflowLevel, callLevel) = events partition { _.key.jobKey.isEmpty } - val foldedWorkflowValues = eventsToIndexedJson(reduceWorkflowEvents(workflowLevel)).toJson.asJsObject + val foldedWorkflowValues = eventsToIndexedJson(reduceWorkflowEvents(workflowLevel)).toJson(expandedValues).asJsObject val callsGroupedByFQN = callLevel groupBy { _.key.jobKey.get.callFqn } val callsGroupedByFQNAndIndex = callsGroupedByFQN mapValues { _ groupBy { _.key.jobKey.get.index } } @@ -171,7 +185,7 @@ object MetadataBuilderActor { val callsMap = callsGroupedByFQNAndIndexAndAttempt mapValues { eventsForIndex => eventsForIndex mapValues { eventsForAttempt => - eventsForAttempt map Function.tupled(eventsToAttemptMetadata) + eventsForAttempt map Function.tupled(eventsToAttemptMetadata(expandedValues)) } map { Function.tupled(attemptMetadataToIndexMetadata) } } mapValues { md => JsArray(md.toVector.sortBy(_.index) flatMap { _.metadata }) } @@ -180,13 +194,13 @@ object MetadataBuilderActor { JsObject(foldedWorkflowValues.fields ++ callData) } - private def parseWorkflowEvents(includeCallsIfEmpty: Boolean)(events: Seq[MetadataEvent]): JsObject = parseWorkflowEventsToTimestampedJsValue(events, includeCallsIfEmpty) + private def parseWorkflowEvents(includeCallsIfEmpty: Boolean, expandedValues: Map[String, JsValue])(events: Seq[MetadataEvent]): JsObject = parseWorkflowEventsToTimestampedJsValue(events, includeCallsIfEmpty, expandedValues) /** * Parse a Seq of MetadataEvent into a full Json metadata response. */ - private def parse(events: Seq[MetadataEvent]): JsObject = { - JsObject(events.groupBy(_.key.workflowId.toString) mapValues parseWorkflowEvents(includeCallsIfEmpty = true)) + private def parse(events: Seq[MetadataEvent], expandedValues: Map[String, JsValue]): JsObject = { + JsObject(events.groupBy(_.key.workflowId.toString) mapValues parseWorkflowEvents(includeCallsIfEmpty = true, expandedValues)) } implicit class EnhancedMetadataValue(val value: MetadataValue) extends AnyVal { @@ -194,12 +208,12 @@ object MetadataBuilderActor { } } -class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[MetadataBuilderActorState, Unit] +class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[MetadataBuilderActorState, Option[MetadataBuilderActorData]] with DefaultJsonProtocol with WorkflowQueryPagination { import WorkflowJsonSupport._ - startWith(Idle, ()) + startWith(Idle, None) val tag = self.path.name when(Idle) { @@ -214,9 +228,8 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me } when(WaitingForMetadataService) { - case Event(MetadataLookupResponse(query, metadata), _) => - context.parent ! RequestComplete((StatusCodes.OK, processMetadataResponse(query, metadata))) - allDone + case Event(MetadataLookupResponse(query, metadata), None) => + processMetadataResponse(query, metadata) case Event(StatusLookupResponse(w, status), _) => context.parent ! RequestComplete((StatusCodes.OK, processStatusResponse(w, status))) allDone @@ -225,7 +238,6 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me context.parent ! RequestComplete((StatusCodes.InternalServerError, response)) allDone case Event(WorkflowQuerySuccess(uri: Uri, response, metadata), _) => - import WorkflowJsonSupport._ context.parent ! RequestCompleteWithHeaders(response, generateLinkHeaders(uri, metadata):_*) allDone case Event(failure: WorkflowQueryFailure, _) => @@ -235,10 +247,10 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me // Add in an empty output event if there aren't already any output events. val hasOutputs = events exists { _.key.key.startsWith(WorkflowMetadataKeys.Outputs + ":") } val updatedEvents = if (hasOutputs) events else MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Outputs)) +: events - context.parent ! RequestComplete((StatusCodes.OK, workflowMetadataResponse(id, updatedEvents, includeCallsIfEmpty = false))) + context.parent ! RequestComplete((StatusCodes.OK, workflowMetadataResponse(id, updatedEvents, includeCallsIfEmpty = false, Map.empty))) allDone case Event(LogsResponse(w, l), _) => - context.parent ! RequestComplete((StatusCodes.OK, workflowMetadataResponse(w, l, includeCallsIfEmpty = false))) + context.parent ! RequestComplete((StatusCodes.OK, workflowMetadataResponse(w, l, includeCallsIfEmpty = false, Map.empty))) allDone case Event(failure: MetadataServiceFailure, _) => context.parent ! RequestComplete((StatusCodes.InternalServerError, APIResponse.error(failure.reason))) @@ -249,14 +261,76 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me context stop self stay() } + + when(WaitingForSubWorkflows) { + case Event(RequestComplete(metadata), Some(data)) => + processSubWorkflowMetadata(metadata, data) + } + + whenUnhandled { + case Event(message, data) => + log.error(s"Received unexpected message $message in state $stateName with data $data") + stay() + } + + def processSubWorkflowMetadata(metadataResponse: Any, data: MetadataBuilderActorData) = { + metadataResponse match { + case (StatusCodes.OK, js: JsObject) => + js.fields.get(WorkflowMetadataKeys.Id) match { + case Some(subId: JsString) => + val newData = data.withSubWorkflow(subId.value, js) + + if (newData.isComplete) { + buildAndStop(data.originalQuery, data.originalEvents, newData.subWorkflowsMetadata) + } else { + stay() using Option(newData) + } + case _ => failAndDie(new RuntimeException("Received unexpected response while waiting for sub workflow metadata.")) + } + case _ => failAndDie(new RuntimeException("Failed to retrieve metadata for a sub workflow.")) + } + } + + def failAndDie(reason: Throwable) = { + context.parent ! RequestComplete((StatusCodes.InternalServerError, APIResponse.error(reason))) + context stop self + stay() + } + + def buildAndStop(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue]) = { + context.parent ! RequestComplete((StatusCodes.OK, processMetadataEvents(query, eventsList, expandedValues))) + allDone + } + + def processMetadataResponse(query: MetadataQuery, eventsList: Seq[MetadataEvent]) = { + if (query.expandSubWorkflows) { + // Scan events for sub workflow ids + val subWorkflowIds = eventsList.collect({ + case MetadataEvent(key, value, _) if key.key.endsWith(CallMetadataKeys.SubWorkflowId) => value map { _.value } + }).flatten + + // If none is found just proceed to build metadata + if (subWorkflowIds.isEmpty) buildAndStop(query, eventsList, Map.empty) + else { + // Otherwise spin up a metadata builder actor for each sub workflow + subWorkflowIds foreach { subId => + val subMetadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), PerRequestCreator.endpointActorName) + subMetadataBuilder ! GetMetadataQueryAction(query.copy(workflowId = WorkflowId.fromString(subId))) + } + goto(WaitingForSubWorkflows) using Option(MetadataBuilderActorData(query, eventsList, Map.empty, subWorkflowIds.size)) + } + } else { + buildAndStop(query, eventsList, Map.empty) + } + } - def processMetadataResponse(query: MetadataQuery, eventsList: Seq[MetadataEvent]): JsObject = { + def processMetadataEvents(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue]): JsObject = { // Should we send back some message ? Or even fail the request instead ? if (eventsList.isEmpty) JsObject(Map.empty[String, JsValue]) else { query match { - case MetadataQuery(w, _, _, _, _) => workflowMetadataResponse(w, eventsList) - case _ => MetadataBuilderActor.parse(eventsList) + case MetadataQuery(w, _, _, _, _, _) => workflowMetadataResponse(w, eventsList, includeCallsIfEmpty = true, expandedValues) + case _ => MetadataBuilderActor.parse(eventsList, expandedValues) } } } @@ -268,7 +342,7 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me )) } - private def workflowMetadataResponse(workflowId: WorkflowId, eventsList: Seq[MetadataEvent], includeCallsIfEmpty: Boolean = true) = { - JsObject(MetadataBuilderActor.parseWorkflowEvents(includeCallsIfEmpty)(eventsList).fields + ("id" -> JsString(workflowId.toString))) + private def workflowMetadataResponse(workflowId: WorkflowId, eventsList: Seq[MetadataEvent], includeCallsIfEmpty: Boolean, expandedValues: Map[String, JsValue]) = { + JsObject(MetadataBuilderActor.parseWorkflowEvents(includeCallsIfEmpty, expandedValues)(eventsList).fields + ("id" -> JsString(workflowId.toString))) } } diff --git a/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala b/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala index 00a530f31e7..5374409350e 100644 --- a/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala +++ b/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala @@ -6,14 +6,14 @@ import wdl4s.values.{WdlArray, WdlString} import cromwell.util.SampleWdl -class ArrayOfArrayCoercionSpec extends CromwellTestkitSpec { +class ArrayOfArrayCoercionSpec extends CromwellTestKitSpec { "A workflow that has an Array[Array[File]] input " should { "accept an Array[Array[String]] as the value for the input" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.ArrayOfArrays, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "wf.subtask.concatenated" -> WdlArray(WdlArrayType(WdlStringType), Seq( + "wf_subtask_concatenated" -> WdlArray(WdlArrayType(WdlStringType), Seq( WdlString("foo\nbar\nbaz"), WdlString("third\nfourth") )) diff --git a/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala b/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala index 843796c1a8a..9c9708307e2 100644 --- a/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala @@ -5,15 +5,15 @@ import java.nio.file.Files import akka.testkit._ import better.files._ import cromwell.util.SampleWdl -import wdl4s.NamespaceWithWorkflow +import wdl4s.{ImportResolver, WdlNamespaceWithWorkflow} import wdl4s.expression.NoFunctions import wdl4s.types.{WdlArrayType, WdlFileType, WdlStringType} import wdl4s.values.{WdlArray, WdlFile, WdlInteger, WdlString} -class ArrayWorkflowSpec extends CromwellTestkitSpec { +class ArrayWorkflowSpec extends CromwellTestKitSpec { val tmpDir = Files.createTempDirectory("ArrayWorkflowSpec") - val ns = NamespaceWithWorkflow.load(SampleWdl.ArrayLiteral(tmpDir).wdlSource("")) + val ns = WdlNamespaceWithWorkflow.load(SampleWdl.ArrayLiteral(tmpDir).wdlSource(""), Seq.empty[ImportResolver]) val expectedArray = WdlArray(WdlArrayType(WdlFileType), Seq(WdlFile("f1"), WdlFile("f2"), WdlFile("f3"))) "A task which contains a parameter " should { @@ -22,9 +22,9 @@ class ArrayWorkflowSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.ArrayIO, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "wf.count_lines.count" -> WdlInteger(3), - "wf.count_lines_array.count" -> WdlInteger(3), - "wf.serialize.contents" -> WdlString("str1\nstr2\nstr3") + "wf_count_lines_count" -> WdlInteger(3), + "wf_count_lines_array_count" -> WdlInteger(3), + "wf_serialize_contents" -> WdlString("str1\nstr2\nstr3") ) ) } @@ -32,7 +32,7 @@ class ArrayWorkflowSpec extends CromwellTestkitSpec { "A static Array[File] declaration" should { "be a valid declaration" in { - val declaration = ns.workflow.declarations.find {_.name == "arr"}.getOrElse { + val declaration = ns.workflow.declarations.find {_.unqualifiedName == "arr"}.getOrElse { fail("Expected declaration 'arr' to be found") } val expression = declaration.expression.getOrElse { @@ -47,14 +47,14 @@ class ArrayWorkflowSpec extends CromwellTestkitSpec { val catTask = ns.findTask("cat").getOrElse { fail("Expected to find task 'cat'") } - val command = catTask.instantiateCommand(Map("files" -> expectedArray), NoFunctions).getOrElse { + val command = catTask.instantiateCommand(catTask.inputsFromMap(Map("cat.files" -> expectedArray)), NoFunctions).getOrElse { fail("Expected instantiation to work") } command shouldEqual "cat -s f1 f2 f3" } "Coerce Array[String] to Array[File] when running the workflow" in { val outputs = Map( - "wf.cat.lines" -> WdlArray(WdlArrayType(WdlStringType), Seq( + "wf_cat_lines" -> WdlArray(WdlArrayType(WdlStringType), Seq( WdlString("line1"), WdlString("line2"), WdlString("line3"), diff --git a/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala b/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala index cea47fe8d1c..202dcecf152 100644 --- a/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala @@ -11,7 +11,7 @@ import wdl4s.types.{WdlArrayType, WdlIntegerType, WdlStringType} import wdl4s.values.{WdlArray, WdlFile, WdlInteger, WdlString} -class CallCachingWorkflowSpec extends CromwellTestkitSpec { +class CallCachingWorkflowSpec extends CromwellTestKitSpec { def cacheHitMessageForCall(name: String) = s"Call Caching: Cache hit. Using UUID\\(.{8}\\):$name\\.*" val expectedOutputs = Map( diff --git a/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala b/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala index c43346330c4..b6465a8acaa 100644 --- a/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala +++ b/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala @@ -9,7 +9,7 @@ import org.scalatest.prop.Tables.Table import scala.language.postfixOps -class CopyWorkflowOutputsSpec extends CromwellTestkitSpec { +class CopyWorkflowOutputsSpec extends CromwellTestKitSpec { "CopyWorkflowOutputsCall" should { "copy workflow outputs" in { @@ -31,7 +31,7 @@ class CopyWorkflowOutputsSpec extends CromwellTestkitSpec { pattern = "transition from FinalizingWorkflowState to WorkflowSucceededState", occurrences = 1), runtime = "", workflowOptions = s""" { "final_workflow_outputs_dir": "$tmpDir" } """, - expectedOutputs = Seq("A.out", "A.out2", "B.outs") map { o => ("wfoutputs." + o) -> CromwellTestkitSpec.AnyValueIsFine } toMap, + expectedOutputs = Seq("A_out", "A_out2", "B_outs") map { o => ("wfoutputs_" + o) -> CromwellTestKitSpec.AnyValueIsFine } toMap, allowOtherOutputs = false ) @@ -64,7 +64,7 @@ class CopyWorkflowOutputsSpec extends CromwellTestkitSpec { pattern = "transition from FinalizingWorkflowState to WorkflowSucceededState", occurrences = 1), runtime = "", workflowOptions = s""" { "final_workflow_outputs_dir": "$tmpDir" } """, - expectedOutputs = Map("wfoutputs.A.outs" -> CromwellTestkitSpec.AnyValueIsFine), + expectedOutputs = Map("wfoutputs_A_outs" -> CromwellTestKitSpec.AnyValueIsFine), allowOtherOutputs = false ) diff --git a/engine/src/test/scala/cromwell/CromwellTestkitSpec.scala b/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala similarity index 94% rename from engine/src/test/scala/cromwell/CromwellTestkitSpec.scala rename to engine/src/test/scala/cromwell/CromwellTestKitSpec.scala index 090d94fd160..c30488027d9 100644 --- a/engine/src/test/scala/cromwell/CromwellTestkitSpec.scala +++ b/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala @@ -2,12 +2,13 @@ package cromwell import java.nio.file.Paths import java.util.UUID +import java.util.concurrent.atomic.AtomicInteger import akka.actor.{Actor, ActorRef, ActorSystem, Props, Terminated} import akka.pattern.ask import akka.testkit._ import com.typesafe.config.{Config, ConfigFactory} -import cromwell.CromwellTestkitSpec._ +import cromwell.CromwellTestKitSpec._ import cromwell.backend._ import cromwell.core._ import cromwell.engine.backend.BackendConfigurationEntry @@ -21,6 +22,7 @@ import cromwell.server.{CromwellRootActor, CromwellSystem} import cromwell.services.ServiceRegistryActor import cromwell.services.metadata.MetadataQuery import cromwell.services.metadata.MetadataService._ +import cromwell.subworkflowstore.EmptySubWorkflowStoreActor import cromwell.util.SampleWdl import cromwell.webservice.PerRequest.RequestComplete import cromwell.webservice.metadata.MetadataBuilderActor @@ -30,7 +32,7 @@ import org.scalatest.time.{Millis, Seconds, Span} import org.scalatest.{BeforeAndAfterAll, Matchers, OneInstancePerTest, WordSpecLike} import spray.http.StatusCode import spray.json._ -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} import wdl4s.types._ import wdl4s.values._ @@ -43,7 +45,7 @@ import scala.util.matching.Regex case class TestBackendLifecycleActorFactory(configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, @@ -63,7 +65,7 @@ case class TestBackendLifecycleActorFactory(configurationDescriptor: BackendConf case class OutputNotFoundException(outputFqn: String, actualOutputs: String) extends RuntimeException(s"Expected output $outputFqn was not found in: '$actualOutputs'") case class LogNotFoundException(log: String) extends RuntimeException(s"Expected log $log was not found") -object CromwellTestkitSpec { +object CromwellTestKitSpec { val ConfigText = """ |akka { @@ -121,9 +123,11 @@ object CromwellTestkitSpec { val TimeoutDuration = 60 seconds + private val testWorkflowManagerSystemCount = new AtomicInteger() + class TestWorkflowManagerSystem extends CromwellSystem { - override protected def systemName: String = "test-system" - override protected def newActorSystem() = ActorSystem(systemName, ConfigFactory.parseString(CromwellTestkitSpec.ConfigText)) + override protected def systemName: String = "test-system-" + testWorkflowManagerSystemCount.incrementAndGet() + override protected def newActorSystem() = ActorSystem(systemName, ConfigFactory.parseString(CromwellTestKitSpec.ConfigText)) /** * Do NOT shut down the test actor system inside the normal flow. * The actor system will be externally shutdown outside the block. @@ -134,18 +138,6 @@ object CromwellTestkitSpec { def shutdownTestActorSystem() = super.shutdownActorSystem() } - /** - * Loans a test actor system. NOTE: This should be run OUTSIDE of a wait block, never within one. - */ - def withTestWorkflowManagerSystem[T](block: CromwellSystem => T): T = { - val testWorkflowManagerSystem = new CromwellTestkitSpec.TestWorkflowManagerSystem - try { - block(testWorkflowManagerSystem) - } finally { - TestKit.shutdownActorSystem(testWorkflowManagerSystem.actorSystem, TimeoutDuration) - } - } - /** * Wait for exactly one occurrence of the specified info pattern in the specified block. The block is in its own * parameter list for usage syntax reasons. @@ -264,9 +256,11 @@ object CromwellTestkitSpec { } class TestCromwellRootActor(config: Config) extends CromwellRootActor { + override val serverMode = true override lazy val serviceRegistryActor = ServiceRegistryActorInstance override lazy val workflowStore = new InMemoryWorkflowStore - def submitWorkflow(sources: WorkflowSourceFiles): WorkflowId = { + override val abortJobsOnTerminate = false + def submitWorkflow(sources: WorkflowSourceFilesWithoutImports): WorkflowId = { val submitMessage = WorkflowStoreActor.SubmitWorkflow(sources) val result = Await.result(workflowStoreActor.ask(submitMessage)(TimeoutDuration), Duration.Inf).asInstanceOf[WorkflowSubmittedToStore].workflowId workflowManagerActor ! RetrieveNewWorkflows @@ -275,12 +269,12 @@ object CromwellTestkitSpec { } } -abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new CromwellTestkitSpec.TestWorkflowManagerSystem()) extends TestKit(twms.actorSystem) +abstract class CromwellTestKitSpec(val twms: TestWorkflowManagerSystem = new CromwellTestKitSpec.TestWorkflowManagerSystem()) extends TestKit(twms.actorSystem) with DefaultTimeout with ImplicitSender with WordSpecLike with Matchers with BeforeAndAfterAll with ScalaFutures with OneInstancePerTest with Eventually { override protected def afterAll() = { twms.shutdownTestActorSystem(); () } - implicit val defaultPatience = PatienceConfig(timeout = Span(30, Seconds), interval = Span(100, Millis)) + implicit val defaultPatience = PatienceConfig(timeout = Span(200, Seconds), interval = Span(1000, Millis)) implicit val ec = system.dispatcher val dummyServiceRegistryActor = system.actorOf(Props.empty) @@ -336,7 +330,7 @@ abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new Cro config: Config = DefaultConfig, patienceConfig: PatienceConfig = defaultPatience)(implicit ec: ExecutionContext): Map[FullyQualifiedName, WdlValue] = { val rootActor = buildCromwellRootActor(config) - val sources = WorkflowSourceFiles(sampleWdl.wdlSource(runtime), sampleWdl.wdlJson, workflowOptions) + val sources = WorkflowSourceFilesWithoutImports(sampleWdl.wdlSource(runtime), sampleWdl.wdlJson, workflowOptions) val workflowId = rootActor.underlyingActor.submitWorkflow(sources) eventually { verifyWorkflowState(rootActor.underlyingActor.serviceRegistryActor, workflowId, terminalState) } (config = patienceConfig, pos = implicitly[org.scalactic.source.Position]) val outcome = getWorkflowOutputsFromMetadata(workflowId, rootActor.underlyingActor.serviceRegistryActor) @@ -382,7 +376,7 @@ abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new Cro def getWorkflowMetadata(workflowId: WorkflowId, serviceRegistryActor: ActorRef, key: Option[String] = None)(implicit ec: ExecutionContext): JsObject = { // MetadataBuilderActor sends its response to context.parent, so we can't just use an ask to talk to it here - val message = GetMetadataQueryAction(MetadataQuery(workflowId, None, key, None, None)) + val message = GetMetadataQueryAction(MetadataQuery(workflowId, None, key, None, None, expandSubWorkflows = false)) val parentProbe = TestProbe() TestActorRef(MetadataBuilderActor.props(serviceRegistryActor), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") ! message @@ -444,6 +438,10 @@ class AlwaysHappyJobStoreActor extends Actor { } } +object AlwaysHappySubWorkflowStoreActor { + def props: Props = Props(new EmptySubWorkflowStoreActor) +} + object AlwaysHappyJobStoreActor { def props: Props = Props(new AlwaysHappyJobStoreActor) } diff --git a/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala b/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala index d829b84eb90..aeca4777f11 100644 --- a/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala @@ -1,7 +1,7 @@ package cromwell import wdl4s.types.{WdlFileType, WdlStringType} -import wdl4s.{NamespaceWithWorkflow, WorkflowInput} +import wdl4s.{ImportResolver, WdlNamespaceWithWorkflow, WorkflowInput} import cromwell.util.SampleWdl import org.scalatest.{Matchers, WordSpecLike} @@ -9,11 +9,11 @@ import org.scalatest.{Matchers, WordSpecLike} class DeclarationWorkflowSpec extends Matchers with WordSpecLike { "A workflow with declarations in it" should { "compute inputs properly" in { - NamespaceWithWorkflow.load(SampleWdl.DeclarationsWorkflow.wdlSource(runtime="")).workflow.inputs shouldEqual Map( - "two_step.cat.file" -> WorkflowInput("two_step.cat.file", WdlFileType, postfixQuantifier = None), - "two_step.cgrep.str_decl" -> WorkflowInput("two_step.cgrep.str_decl", WdlStringType, postfixQuantifier = None), - "two_step.cgrep.pattern" -> WorkflowInput("two_step.cgrep.pattern", WdlStringType, postfixQuantifier = None), - "two_step.flags_suffix" -> WorkflowInput("two_step.flags_suffix", WdlStringType, postfixQuantifier = None) + WdlNamespaceWithWorkflow.load(SampleWdl.DeclarationsWorkflow.wdlSource(runtime=""), Seq.empty[ImportResolver]).workflow.inputs shouldEqual Map( + "two_step.cat.file" -> WorkflowInput("two_step.cat.file", WdlFileType), + "two_step.cgrep.str_decl" -> WorkflowInput("two_step.cgrep.str_decl", WdlStringType), + "two_step.cgrep.pattern" -> WorkflowInput("two_step.cgrep.pattern", WdlStringType), + "two_step.flags_suffix" -> WorkflowInput("two_step.flags_suffix", WdlStringType) ) } } diff --git a/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala b/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala index 1aaafaf2fa1..40eb03624bc 100644 --- a/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala @@ -6,19 +6,19 @@ import wdl4s.values.{WdlFile, WdlString} import scala.concurrent.duration._ -class FilePassingWorkflowSpec extends CromwellTestkitSpec { +class FilePassingWorkflowSpec extends CromwellTestKitSpec { "A workflow that passes files between tasks" should { "pass files properly" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.FilePassingWorkflow, EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "file_passing.a.out" -> WdlFile("out"), - "file_passing.a.out_interpolation" -> WdlFile("out"), - "file_passing.a.contents" -> WdlString("foo bar baz"), - "file_passing.b.out" -> WdlFile("out"), - "file_passing.b.out_interpolation" -> WdlFile("out"), - "file_passing.b.contents" -> WdlString("foo bar baz") + "file_passing_a_out" -> WdlFile("out"), + "file_passing_a_out_interpolation" -> WdlFile("out"), + "file_passing_a_contents" -> WdlString("foo bar baz"), + "file_passing_b_out" -> WdlFile("out"), + "file_passing_b_out_interpolation" -> WdlFile("out"), + "file_passing_b_contents" -> WdlString("foo bar baz") ), patienceConfig = PatienceConfig(2.minutes.dilated) ) diff --git a/engine/src/test/scala/cromwell/MapWorkflowSpec.scala b/engine/src/test/scala/cromwell/MapWorkflowSpec.scala index 9a00c115eb8..f13b022c590 100644 --- a/engine/src/test/scala/cromwell/MapWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/MapWorkflowSpec.scala @@ -3,17 +3,17 @@ package cromwell import akka.testkit._ import better.files._ import cromwell.util.SampleWdl -import wdl4s.NamespaceWithWorkflow +import wdl4s.{ImportResolver, WdlNamespaceWithWorkflow} import wdl4s.expression.{NoFunctions, WdlFunctions} import wdl4s.types.{WdlFileType, WdlIntegerType, WdlMapType, WdlStringType} import wdl4s.values._ import scala.util.{Success, Try} -class MapWorkflowSpec extends CromwellTestkitSpec { +class MapWorkflowSpec extends CromwellTestKitSpec { private val pwd = File(".") private val sampleWdl = SampleWdl.MapLiteral(pwd.path) - val ns = NamespaceWithWorkflow.load(sampleWdl.wdlSource("")) + val ns = WdlNamespaceWithWorkflow.load(sampleWdl.wdlSource(""), Seq.empty[ImportResolver]) val expectedMap = WdlMap(WdlMapType(WdlFileType, WdlStringType), Map( WdlFile("f1") -> WdlString("alice"), WdlFile("f2") -> WdlString("bob"), @@ -28,12 +28,12 @@ class MapWorkflowSpec extends CromwellTestkitSpec { sampleWdl = sampleWdl, EventFilter.info(pattern = "Starting calls: wf.read_map:NA:1, wf.write_map:NA:1", occurrences = 1), expectedOutputs = Map( - "wf.read_map.out_map" -> WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map( + "wf_read_map_out_map" -> WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map( WdlString("x") -> WdlInteger(500), WdlString("y") -> WdlInteger(600), WdlString("z") -> WdlInteger(700) )), - "wf.write_map.contents" -> WdlString("f1\talice\nf2\tbob\nf3\tchuck") + "wf_write_map_contents" -> WdlString("f1\talice\nf2\tbob\nf3\tchuck") ) ) sampleWdl.cleanup() @@ -42,7 +42,7 @@ class MapWorkflowSpec extends CromwellTestkitSpec { "A static Map[File, String] declaration" should { "be a valid declaration" in { - val declaration = ns.workflow.declarations.find {_.name == "map"}.getOrElse { + val declaration = ns.workflow.declarations.find {_.unqualifiedName == "map"}.getOrElse { fail("Expected declaration 'map' to be found") } val expression = declaration.expression.getOrElse { @@ -64,7 +64,7 @@ class MapWorkflowSpec extends CromwellTestkitSpec { case _ => throw new UnsupportedOperationException("Only write_map should be called") } } - val command = writeMapTask.instantiateCommand(Map("file_to_name" -> expectedMap), new CannedFunctions).getOrElse { + val command = writeMapTask.instantiateCommand(writeMapTask.inputsFromMap(Map("file_to_name" -> expectedMap)), new CannedFunctions).getOrElse { fail("Expected instantiation to work") } command shouldEqual "cat /test/map/path" @@ -75,7 +75,7 @@ class MapWorkflowSpec extends CromwellTestkitSpec { sampleWdl, eventFilter = EventFilter.info(pattern = "Starting calls: wf.read_map:NA:1, wf.write_map:NA:1", occurrences = 1), expectedOutputs = Map( - "wf.read_map.out_map" -> WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map( + "wf_read_map_out_map" -> WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map( WdlString("x") -> WdlInteger(500), WdlString("y") -> WdlInteger(600), WdlString("z") -> WdlInteger(700) diff --git a/engine/src/test/scala/cromwell/MetadataWatchActor.scala b/engine/src/test/scala/cromwell/MetadataWatchActor.scala index 691c4efc58e..c0c29444274 100644 --- a/engine/src/test/scala/cromwell/MetadataWatchActor.scala +++ b/engine/src/test/scala/cromwell/MetadataWatchActor.scala @@ -1,7 +1,7 @@ package cromwell import akka.actor.{Actor, Props} -import cromwell.services.metadata.{MetadataEvent, MetadataJobKey, MetadataString} +import cromwell.services.metadata.{MetadataEvent, MetadataJobKey, MetadataString, MetadataValue} import cromwell.services.metadata.MetadataService.PutMetadataAction import MetadataWatchActor._ @@ -32,26 +32,36 @@ object MetadataWatchActor { trait Matcher { def matches(events: Traversable[MetadataEvent]): Boolean + private var _nearMisses: List[String] = List.empty + protected def addNearMissInfo(miss: String) = _nearMisses :+= miss + def nearMissInformation = _nearMisses + + def checkMetadataValueContains(key: String, actual: MetadataValue, expected: String): Boolean = { + val result = actual.value.contains(expected) + if (!result) addNearMissInfo(s"Key $key had unexpected value.\nActual value: ${actual.value}\n\nDid not contain: $expected") + result + } } def metadataKeyAttemptChecker(attempt: Int): Option[MetadataJobKey] => Boolean = { case Some(jobKey) => jobKey.attempt == attempt case None => false } + final case class JobKeyMetadataKeyAndValueContainStringMatcher(jobKeyCheck: Option[MetadataJobKey] => Boolean, key: String, value: String) extends Matcher { def matches(events: Traversable[MetadataEvent]): Boolean = { - events.exists(e => e.key.key.contains(key) && jobKeyCheck(e.key.jobKey) && e.value.exists { v => v.valueType == MetadataString && v.value.contains(value) }) + events.exists(e => e.key.key.contains(key) && jobKeyCheck(e.key.jobKey) && e.value.exists { v => v.valueType == MetadataString && checkMetadataValueContains(e.key.key, v, value) }) } } abstract class KeyMatchesRegexAndValueContainsStringMatcher(keyTemplate: String, value: String) extends Matcher { val templateRegex = keyTemplate.r def matches(events: Traversable[MetadataEvent]): Boolean = { - events.exists(e => templateRegex.findFirstIn(e.key.key).isDefined && e.value.exists { v => v.value.contains(value) }) + events.exists(e => templateRegex.findFirstIn(e.key.key).isDefined && + e.value.exists { v => checkMetadataValueContains(e.key.key, v, value) }) } } val failurePattern = """failures\[\d*\].message""" - final case class FailureMatcher(value: String) extends KeyMatchesRegexAndValueContainsStringMatcher(failurePattern, value) { - } + final case class FailureMatcher(value: String) extends KeyMatchesRegexAndValueContainsStringMatcher(failurePattern, value) { } } diff --git a/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala b/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala index f0b7a70af0f..4fc24c56e23 100644 --- a/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala @@ -5,15 +5,15 @@ import cromwell.util.SampleWdl import wdl4s.values.WdlString -class MultipleFilesWithSameNameWorkflowSpec extends CromwellTestkitSpec { +class MultipleFilesWithSameNameWorkflowSpec extends CromwellTestKitSpec { "A workflow with two file inputs that have the same name" should { "not clobber one file with the contents of another" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.FileClobber, EventFilter.info(pattern = "Starting calls: two.x:NA:1, two.y:NA:1", occurrences = 1), expectedOutputs = Map( - "two.x.out" -> WdlString("first file.txt"), - "two.y.out" -> WdlString("second file.txt") + "two_x_out" -> WdlString("first file.txt"), + "two_y_out" -> WdlString("second file.txt") ) ) } diff --git a/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala b/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala index 919008315bc..73347b7944a 100644 --- a/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala @@ -22,20 +22,20 @@ class OptionalParamWorkflowSpec extends Matchers with WordSpecLike { | call find |} """.stripMargin - val ns = WdlNamespace.load(wf) + val ns = WdlNamespace.loadUsingSource(wf, None, None) val findTask = ns.findTask("find") getOrElse { fail("Expected to find task 'find'") } - val instantiateWithoutValue = findTask.instantiateCommand(Map("root" -> WdlFile("src")), NoFunctions) getOrElse { + val instantiateWithoutValue = findTask.instantiateCommand(findTask.inputsFromMap(Map("find.root" -> WdlFile("src"))), NoFunctions) getOrElse { fail("Expected instantiation to work") } instantiateWithoutValue shouldEqual "find src" - val instantiateWithValue = findTask.instantiateCommand(Map( - "root" -> WdlFile("src"), - "pattern" -> WdlString("*.java") - ), NoFunctions).getOrElse {fail("Expected instantiation to work")} + val instantiateWithValue = findTask.instantiateCommand(findTask.inputsFromMap(Map( + "find.root" -> WdlFile("src"), + "find.pattern" -> WdlString("*.java") + )), NoFunctions).getOrElse {fail("Expected instantiation to work")} instantiateWithValue shouldEqual "find src -name *.java" } } diff --git a/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala b/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala index 8530dd6d29c..c72d98758d8 100644 --- a/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala @@ -5,27 +5,27 @@ import wdl4s.values.WdlString import cromwell.util.SampleWdl -class PostfixQuantifierWorkflowSpec extends CromwellTestkitSpec { +class PostfixQuantifierWorkflowSpec extends CromwellTestKitSpec { "A task which contains a parameter with a zero-or-more postfix quantifier" should { "accept an array of size 3" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.ZeroOrMorePostfixQuantifierWorkflowWithArrayInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello alice,bob,charles")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello alice,bob,charles")) ) } "accept an array of size 1" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.ZeroOrMorePostfixQuantifierWorkflowWithOneElementArrayInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello alice")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello alice")) ) } "accept an array of size 0" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.ZeroOrMorePostfixQuantifierWorkflowWithZeroElementArrayInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello")) ) } } @@ -35,14 +35,14 @@ class PostfixQuantifierWorkflowSpec extends CromwellTestkitSpec { runWdlAndAssertOutputs( sampleWdl = SampleWdl.OneOrMorePostfixQuantifierWorkflowWithArrayInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello alice,bob,charles")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello alice,bob,charles")) ) } "accept a scalar for the value" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.OneOrMorePostfixQuantifierWorkflowWithScalarInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello alice")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello alice")) ) } } diff --git a/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala b/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala index 6b706fbc6ea..5db1f3ebe3f 100644 --- a/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala @@ -6,11 +6,11 @@ import cromwell.core.Tags._ import cromwell.core._ import cromwell.engine.workflow.WorkflowDescriptorBuilder -class RestartWorkflowSpec extends CromwellTestkitSpec with WorkflowDescriptorBuilder { +class RestartWorkflowSpec extends CromwellTestKitSpec with WorkflowDescriptorBuilder { - val actorSystem = ActorSystem("RestartWorkflowSpec", ConfigFactory.parseString(CromwellTestkitSpec.ConfigText)) + val actorSystem = ActorSystem("RestartWorkflowSpec", ConfigFactory.parseString(CromwellTestKitSpec.ConfigText)) //val localBackend = new OldStyleLocalBackend(CromwellTestkitSpec.DefaultLocalBackendConfigEntry, actorSystem) - val sources = WorkflowSourceFiles( + val sources = WorkflowSourceFilesWithoutImports( wdlSource="""task a {command{}} |workflow w { | call a diff --git a/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala b/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala index 0d8847a27b2..c1ca1ad9e94 100644 --- a/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala @@ -6,15 +6,15 @@ import wdl4s.types.{WdlArrayType, WdlFileType, WdlIntegerType, WdlStringType} import wdl4s.values.{WdlArray, WdlFile, WdlInteger, WdlString} import cromwell.util.SampleWdl -class ScatterWorkflowSpec extends CromwellTestkitSpec { +class ScatterWorkflowSpec extends CromwellTestKitSpec { "A workflow with a stand-alone scatter block in it" should { "run properly" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.SimpleScatterWdl, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "scatter0.outside_scatter.out" -> WdlInteger(8000), - "scatter0.inside_scatter.out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(1, 2, 3, 4, 5).map(WdlInteger(_))) + "scatter0_outside_scatter_out" -> WdlInteger(8000), + "scatter0_inside_scatter_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(1, 2, 3, 4, 5).map(WdlInteger(_))) ) ) } @@ -25,11 +25,11 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { sampleWdl = new SampleWdl.ScatterWdl, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "w.E.E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), - "w.C.C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), - "w.A.A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "scott").map(WdlString)), - "w.D.D_out" -> WdlInteger(34), - "w.B.B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) + "w_E_E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), + "w_C_C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), + "w_A_A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "ruchi").map(WdlString)), + "w_D_D_out" -> WdlInteger(34), + "w_B_B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) ) ) } @@ -40,12 +40,12 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.SiblingsScatterWdl, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "w.E.E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), - "w.F.B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))), - "w.C.C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), - "w.A.A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "scott").map(WdlString)), - "w.D.D_out" -> WdlInteger(34), - "w.B.B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) + "w_E_E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), + "w_F_B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))), + "w_C_C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), + "w_A_A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "ruchi").map(WdlString)), + "w_D_D_out" -> WdlInteger(34), + "w_B_B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) ) ) } @@ -57,9 +57,9 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.PrepareScatterGatherWdl(), eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "sc_test.do_gather.sum" -> WdlInteger(11), - "sc_test.do_prepare.split_files" -> WdlArray(WdlArrayType(WdlFileType), Seq("temp_aa", "temp_ab", "temp_ac", "temp_ad").map(WdlFile(_))), - "sc_test.do_scatter.count_file" -> WdlArray(WdlArrayType(WdlFileType), (1 to 4).map(_ => WdlFile("output.txt"))) + "sc_test_do_gather_sum" -> WdlInteger(11), + "sc_test_do_prepare_split_files" -> WdlArray(WdlArrayType(WdlFileType), Seq("temp_aa", "temp_ab", "temp_ac", "temp_ad").map(WdlFile(_))), + "sc_test_do_scatter_count_file" -> WdlArray(WdlArrayType(WdlFileType), (1 to 4).map(_ => WdlFile("output.txt"))) ) ) } @@ -74,9 +74,9 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { |} """.stripMargin, expectedOutputs = Map( - "sc_test.do_gather.sum" -> WdlInteger(11), - "sc_test.do_prepare.split_files" -> WdlArray(WdlArrayType(WdlFileType), Seq("temp_aa", "temp_ab", "temp_ac", "temp_ad").map(WdlFile(_))), - "sc_test.do_scatter.count_file" -> WdlArray(WdlArrayType(WdlFileType), (1 to 4).map(_ => WdlFile("output.txt"))) + "sc_test_do_gather_sum" -> WdlInteger(11), + "sc_test_do_prepare_split_files" -> WdlArray(WdlArrayType(WdlFileType), Seq("temp_aa", "temp_ab", "temp_ac", "temp_ad").map(WdlFile(_))), + "sc_test_do_scatter_count_file" -> WdlArray(WdlArrayType(WdlFileType), (1 to 4).map(_ => WdlFile("output.txt"))) ) ) } diff --git a/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala b/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala index f327c6da413..3859368d761 100644 --- a/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala +++ b/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala @@ -7,7 +7,7 @@ import akka.testkit._ import com.typesafe.config.ConfigFactory import cromwell.MetadataWatchActor.{FailureMatcher, Matcher} import cromwell.SimpleWorkflowActorSpec._ -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowSourceFilesWithoutImports} import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor import cromwell.engine.workflow.WorkflowActor._ @@ -28,13 +28,13 @@ object SimpleWorkflowActorSpec { promise: Promise[Unit]) } -class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { +class SimpleWorkflowActorSpec extends CromwellTestKitSpec with BeforeAndAfter { private def buildWorkflowActor(sampleWdl: SampleWdl, rawInputsOverride: String, workflowId: WorkflowId, matchers: Matcher*): TestableWorkflowActorAndMetadataPromise = { - val workflowSources = WorkflowSourceFiles(sampleWdl.wdlSource(), rawInputsOverride, "{}") + val workflowSources = WorkflowSourceFilesWithoutImports(sampleWdl.wdlSource(), rawInputsOverride, "{}") val promise = Promise[Unit]() val watchActor = system.actorOf(MetadataWatchActor.props(promise, matchers: _*), s"service-registry-$workflowId-${UUID.randomUUID()}") val supervisor = TestProbe() @@ -43,9 +43,11 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { serviceRegistryActor = watchActor, workflowLogCopyRouter = system.actorOf(Props.empty, s"workflow-copy-log-router-$workflowId-${UUID.randomUUID()}"), jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props), + subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props), callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props), jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props), - backendSingletonCollection = BackendSingletonCollection(Map("Local" -> None))), + backendSingletonCollection = BackendSingletonCollection(Map("Local" -> None)), + serverMode = true), supervisor = supervisor.ref, name = s"workflow-actor-$workflowId" ) @@ -64,7 +66,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, _) = buildWorkflowActor(SampleWdl.HelloWorld, SampleWdl.HelloWorld.wdlJson, workflowId) val probe = TestProbe() probe watch workflowActor - startingCallsFilter("hello.hello") { + startingCallsFilter("wf_hello.hello") { workflowActor ! StartWorkflowCommand } @@ -75,7 +77,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "fail to construct with missing inputs" in { - val expectedError = "Required workflow input 'hello.hello.addressee' not specified." + val expectedError = "Required workflow input 'wf_hello.hello.addressee' not specified." val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.HelloWorld, "{}", workflowId, failureMatcher) val probe = TestProbe() @@ -92,7 +94,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "fail to construct with inputs of the wrong type" in { - val expectedError = "Could not coerce value for 'hello.hello.addressee' into: WdlStringType" + val expectedError = "Could not coerce JsNumber value for 'wf_hello.hello.addressee' (3) into: WdlStringType" val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.HelloWorld, s""" { "$Addressee" : 3} """, workflowId, failureMatcher) @@ -100,7 +102,13 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { val probe = TestProbe() probe watch workflowActor workflowActor ! StartWorkflowCommand - Await.result(promise.future, TestExecutionTimeout) + try { + Await.result(promise.future, TestExecutionTimeout) + } catch { + case e: Throwable => + val info = failureMatcher.nearMissInformation + fail(s"We didn't see the expected error message $expectedError within $TestExecutionTimeout. ${info.mkString(", ")}") + } probe.expectTerminated(workflowActor, AwaitAlmostNothing) supervisor.expectMsgPF(AwaitAlmostNothing, "parent should get a failed response") { case x: WorkflowFailedResponse => @@ -111,12 +119,12 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "fail when a call fails" in { - val expectedError = "Call goodbye.goodbye: return code was 1" + val expectedError = "Call wf_goodbye.goodbye:NA:1: return code was 1" val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.GoodbyeWorld, SampleWdl.GoodbyeWorld.wdlJson, workflowId, failureMatcher) val probe = TestProbe() probe watch workflowActor - startingCallsFilter("goodbye.goodbye") { + startingCallsFilter("wf_goodbye.goodbye") { workflowActor ! StartWorkflowCommand } Await.result(promise.future, TestExecutionTimeout) @@ -130,7 +138,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "gracefully handle malformed WDL" in { - val expectedError = "Input evaluation for Call test1.summary failedVariable 'Can't find bfile' not found" + val expectedError = "Input evaluation for Call test1.summary failed.\nVariable 'bfile' not found" val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.CoercionNotDefined, SampleWdl.CoercionNotDefined.wdlJson, workflowId, failureMatcher) val probe = TestProbe() @@ -148,7 +156,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } private def startingCallsFilter[T](callNames: String*)(block: => T): T = { - import CromwellTestkitSpec.waitForInfo + import CromwellTestKitSpec.waitForInfo within(TestExecutionTimeout) { waitForInfo(s"Starting calls: ${callNames.mkString("", ":NA:1, ", ":NA:1")}$$", 1) { block diff --git a/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala b/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala index 72c618fca52..3052e337041 100644 --- a/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala +++ b/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala @@ -6,7 +6,7 @@ import wdl4s.types.{WdlMapType, WdlStringType} import wdl4s.values.{WdlMap, WdlString} -class WdlFunctionsAtWorkflowLevelSpec extends CromwellTestkitSpec { +class WdlFunctionsAtWorkflowLevelSpec extends CromwellTestKitSpec { val outputMap = WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( WdlString("k1") -> WdlString("v1"), WdlString("k2") -> WdlString("v2"), @@ -19,8 +19,8 @@ class WdlFunctionsAtWorkflowLevelSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.WdlFunctionsAtWorkflowLevel, eventFilter = EventFilter.info(pattern = "Starting calls: w.a", occurrences = 1), expectedOutputs = Map( - "w.a.x" -> WdlString("one two three four five"), - "w.a.y" -> outputMap + "w_a_x" -> WdlString("one two three four five"), + "w_a_y" -> outputMap ) ) } diff --git a/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala b/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala index 1cd7a7ef36c..2093cf19848 100644 --- a/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala +++ b/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala @@ -5,7 +5,7 @@ import cromwell.util.SampleWdl // TODO: These tests are (and were) somewhat unsatisfactory. They'd be much better if we use TestFSMRefs and TestProbes to simulate job completions against the WorkflowActor and make sure it only completes the workflow at the appropriate time. -class WorkflowFailSlowSpec extends CromwellTestkitSpec { +class WorkflowFailSlowSpec extends CromwellTestKitSpec { val FailFastOptions = """ |{ diff --git a/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala b/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala index 18df317959d..0f210fd1e93 100644 --- a/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala +++ b/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala @@ -2,10 +2,10 @@ package cromwell import akka.testkit._ import cromwell.util.SampleWdl -import cromwell.CromwellTestkitSpec.AnyValueIsFine +import cromwell.CromwellTestKitSpec.AnyValueIsFine -class WorkflowOutputsSpec extends CromwellTestkitSpec { +class WorkflowOutputsSpec extends CromwellTestKitSpec { "Workflow outputs" should { "use all outputs if none are specified" in { runWdlAndAssertOutputs( @@ -13,9 +13,9 @@ class WorkflowOutputsSpec extends CromwellTestkitSpec { eventFilter = EventFilter.info(pattern = s"is in a terminal state: WorkflowSucceededState", occurrences = 1), runtime = "", expectedOutputs = Map( - "three_step.ps.procs" -> AnyValueIsFine, - "three_step.cgrep.count" -> AnyValueIsFine, - "three_step.wc.count" -> AnyValueIsFine + "three_step_ps_procs" -> AnyValueIsFine, + "three_step_cgrep_count" -> AnyValueIsFine, + "three_step_wc_count" -> AnyValueIsFine ), allowOtherOutputs = false ) @@ -27,8 +27,8 @@ class WorkflowOutputsSpec extends CromwellTestkitSpec { eventFilter = EventFilter.info(pattern = s"is in a terminal state: WorkflowSucceededState", occurrences = 1), runtime = "", expectedOutputs = Map( - "three_step.cgrep.count" -> AnyValueIsFine, - "three_step.wc.count" -> AnyValueIsFine + "three_step_cgrep_count" -> AnyValueIsFine, + "three_step_wc_count" -> AnyValueIsFine ), allowOtherOutputs = false ) @@ -40,8 +40,8 @@ class WorkflowOutputsSpec extends CromwellTestkitSpec { eventFilter = EventFilter.info(pattern = s"is in a terminal state: WorkflowSucceededState", occurrences = 1), runtime = "", expectedOutputs = Map( - "scatter0.outside_scatter.out" -> AnyValueIsFine, - "scatter0.inside_scatter.out" -> AnyValueIsFine + "scatter0_outside_scatter_out" -> AnyValueIsFine, + "scatter0_inside_scatter_out" -> AnyValueIsFine ), allowOtherOutputs = false ) @@ -53,7 +53,7 @@ class WorkflowOutputsSpec extends CromwellTestkitSpec { eventFilter = EventFilter.info(pattern = s"is in a terminal state: WorkflowSucceededState", occurrences = 1), runtime = "", expectedOutputs = Map( - "scatter0.inside_scatter.out" -> AnyValueIsFine + "scatter0_inside_scatter_out" -> AnyValueIsFine ), allowOtherOutputs = false ) diff --git a/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala b/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala index ec512107e9e..14ccbd6bddb 100644 --- a/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala +++ b/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala @@ -1,21 +1,23 @@ package cromwell.engine -import java.nio.file.{FileSystem, FileSystems, Path} +import java.nio.file.Path -import cromwell.backend.wdl.{PureFunctions, ReadLikeFunctions, WriteFunctions} +import cromwell.backend.wdl.{ReadLikeFunctions, WriteFunctions} +import cromwell.core.path.{DefaultPathBuilder, PathBuilder} import org.scalatest.prop.TableDrivenPropertyChecks._ import org.scalatest.prop.Tables.Table import org.scalatest.{FlatSpec, Matchers} -import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} +import wdl4s.expression.{NoFunctions, PureStandardLibraryFunctionsLike, WdlStandardLibraryFunctions} import wdl4s.values.{WdlFile, WdlInteger, WdlString, WdlValue} import scala.util.{Failure, Success, Try} class EngineFunctionsSpec extends FlatSpec with Matchers { - trait WdlStandardLibraryImpl extends WdlStandardLibraryFunctions with ReadLikeFunctions with WriteFunctions with PureFunctions { + trait WdlStandardLibraryImpl extends WdlStandardLibraryFunctions with ReadLikeFunctions with WriteFunctions with PureStandardLibraryFunctionsLike { private def fail(name: String) = Failure(new NotImplementedError(s"$name() not implemented yet")) + override def writeTempFile(path: String, prefix: String, suffix: String, content: String): String = super[WriteFunctions].writeTempFile(path, prefix, suffix, content) override def stdout(params: Seq[Try[WdlValue]]): Try[WdlFile] = fail("stdout") override def stderr(params: Seq[Try[WdlValue]]): Try[WdlFile] = fail("stderr") } @@ -38,7 +40,7 @@ class EngineFunctionsSpec extends FlatSpec with Matchers { "sub" should "replace a string according to a pattern" in { class TestEngineFn extends WdlStandardLibraryImpl { override def glob(path: String, pattern: String): Seq[String] = ??? - override def fileSystems: List[FileSystem] = List(FileSystems.getDefault) + override def pathBuilders: List[PathBuilder] = List(DefaultPathBuilder) override def writeDirectory: Path = ??? } diff --git a/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala b/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala index 6a4b0a077ac..37ed641c8a7 100644 --- a/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala +++ b/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala @@ -1,8 +1,8 @@ package cromwell.engine -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec -class WorkflowAbortSpec extends CromwellTestkitSpec { +class WorkflowAbortSpec extends CromwellTestKitSpec { // TODO: When re-enabled, this test also needs to check that child processes have actually been stopped. "A WorkflowManagerActor" should { diff --git a/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala b/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala index 55faea29fb8..1d7d313b638 100644 --- a/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala @@ -1,11 +1,11 @@ package cromwell.engine -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.engine.workflow.WorkflowDescriptorBuilder import cromwell.util.SampleWdl -class WorkflowManagerActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuilder { +class WorkflowManagerActorSpec extends CromwellTestKitSpec with WorkflowDescriptorBuilder { override implicit val actorSystem = system "A WorkflowManagerActor" should { @@ -13,7 +13,7 @@ class WorkflowManagerActorSpec extends CromwellTestkitSpec with WorkflowDescript "run workflows in the correct directory" in { val outputs = runWdl(sampleWdl = SampleWdl.CurrentDirectory) - val outputName = "whereami.whereami.pwd" + val outputName = "wf_whereami_whereami_pwd" val salutation = outputs(outputName) val actualOutput = salutation.valueString.trim actualOutput should endWith("/call-whereami/execution") diff --git a/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala b/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala index 18460e765fe..d6719fe85ec 100644 --- a/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala @@ -1,17 +1,21 @@ package cromwell.engine import cats.data.NonEmptyList -import cromwell.CromwellTestkitSpec -import cromwell.core.WorkflowId +import cromwell.CromwellTestKitSpec +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ import cromwell.engine.workflow.workflowstore._ +import cromwell.services.metadata.MetadataQuery +import cromwell.services.metadata.MetadataService.{GetMetadataQueryAction, MetadataLookupResponse} +import cromwell.services.metadata.impl.ReadMetadataActor +import cromwell.util.EncryptionSpec import cromwell.util.SampleWdl.HelloWorld import org.scalatest.Matchers import scala.concurrent.duration._ import scala.language.postfixOps -class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { +class WorkflowStoreActorSpec extends CromwellTestKitSpec with Matchers { val helloWorldSourceFiles = HelloWorld.asWorkflowSources() /** @@ -31,17 +35,22 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { list.foldLeft((List.empty[WorkflowToStart], true))(folderFunction)._2 } + private def prettyOptions(workflowSourceFiles: WorkflowSourceFilesCollection): WorkflowSourceFilesCollection = { + import spray.json._ + workflowSourceFiles.copyOptions(workflowSourceFiles.workflowOptionsJson.parseJson.prettyPrint) + } + "The WorkflowStoreActor" should { "return an ID for a submitted workflow" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! SubmitWorkflow(helloWorldSourceFiles) expectMsgType[WorkflowSubmittedToStore](10 seconds) } "return 3 IDs for a batch submission of 3" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! BatchSubmitWorkflows(NonEmptyList.of(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) expectMsgPF(10 seconds) { case WorkflowsBatchSubmittedToStore(ids) => ids.toList.size shouldBe 3 @@ -50,7 +59,7 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { "fetch exactly N workflows" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! BatchSubmitWorkflows(NonEmptyList.of(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList @@ -63,15 +72,65 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { workflowNel map { case WorkflowToStart(id, sources, state) => insertedIds.contains(id) shouldBe true - sources shouldBe helloWorldSourceFiles + sources shouldBe prettyOptions(helloWorldSourceFiles) state shouldBe WorkflowStoreState.Submitted } } } + "fetch encrypted and cleared workflow options" in { + EncryptionSpec.assumeAes256Cbc() + + val optionedSourceFiles = HelloWorld.asWorkflowSources(workflowOptions = + s"""|{ + | "key": "value", + | "refresh_token": "it's a secret" + |} + |""".stripMargin) + + + val store = new InMemoryWorkflowStore + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) + val readMetadataActor = system.actorOf(ReadMetadataActor.props()) + storeActor ! BatchSubmitWorkflows(NonEmptyList.of(optionedSourceFiles)) + val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList + + storeActor ! FetchRunnableWorkflows(1) + expectMsgPF(10 seconds) { + case NewWorkflowsToStart(workflowNel) => + workflowNel.toList.size should be(1) + checkDistinctIds(workflowNel.toList) should be(true) + workflowNel.toList.foreach { + case WorkflowToStart(id, sources, state) => + insertedIds.contains(id) should be(true) + sources.wdlSource should be(optionedSourceFiles.wdlSource) + sources.inputsJson should be(optionedSourceFiles.inputsJson) + state should be(WorkflowStoreState.Submitted) + + import spray.json._ + + val encryptedJsObject = sources.workflowOptionsJson.parseJson.asJsObject + encryptedJsObject.fields.keys should contain theSameElementsAs Seq("key", "refresh_token") + encryptedJsObject.fields("key") should be(JsString("value")) + encryptedJsObject.fields("refresh_token").asJsObject.fields.keys should contain theSameElementsAs + Seq("iv", "ciphertext") + + readMetadataActor ! GetMetadataQueryAction(MetadataQuery.forWorkflow(id)) + expectMsgPF(10 seconds) { + case MetadataLookupResponse(_, eventList) => + val optionsEvent = eventList.find(_.key.key == "submittedFiles:options").get + val clearedJsObject = optionsEvent.value.get.value.parseJson.asJsObject + clearedJsObject.fields.keys should contain theSameElementsAs Seq("key", "refresh_token") + clearedJsObject.fields("key") should be(JsString("value")) + clearedJsObject.fields("refresh_token") should be(JsString("cleared")) + } + } + } + } + "return only the remaining workflows if N is larger than size" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! BatchSubmitWorkflows(NonEmptyList.of(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList @@ -84,7 +143,7 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { workflowNel map { case WorkflowToStart(id, sources, state) => insertedIds.contains(id) shouldBe true - sources shouldBe helloWorldSourceFiles + sources shouldBe prettyOptions(helloWorldSourceFiles) state shouldBe WorkflowStoreState.Submitted } } @@ -92,7 +151,7 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { "remove workflows which exist" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! SubmitWorkflow(helloWorldSourceFiles) val id = expectMsgType[WorkflowSubmittedToStore](10 seconds).workflowId storeActor ! RemoveWorkflow(id) @@ -105,7 +164,7 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { "remain responsive if you ask to remove a workflow it doesn't have" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) val id = WorkflowId.randomId() storeActor ! RemoveWorkflow(id) diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala b/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala index f98fa17bd9a..0763a1c30c0 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala @@ -1,9 +1,9 @@ package cromwell.engine.backend.mock import akka.actor.{ActorRef, Props} -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobSucceededResponse} import cromwell.backend._ -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} import scala.concurrent.Future @@ -14,7 +14,7 @@ object DefaultBackendJobExecutionActor { case class DefaultBackendJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { override def execute: Future[BackendJobExecutionResponse] = { - Future.successful(SucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) + Future.successful(JobSucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) } override def recover = execute @@ -25,7 +25,7 @@ case class DefaultBackendJobExecutionActor(override val jobDescriptor: BackendJo class DefaultBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala index 60617f468b7..eaaa04abb52 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala @@ -2,7 +2,7 @@ package cromwell.engine.backend.mock import akka.actor.Props import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobExecutionActor} -import cromwell.backend.BackendJobExecutionActor.{FailedRetryableResponse, BackendJobExecutionResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedRetryableResponse, BackendJobExecutionResponse, JobSucceededResponse} import scala.concurrent.Future @@ -16,9 +16,9 @@ case class RetryableBackendJobExecutionActor(override val jobDescriptor: Backend override def execute: Future[BackendJobExecutionResponse] = { if (jobDescriptor.key.attempt < attempts) - Future.successful(FailedRetryableResponse(jobDescriptor.key, new RuntimeException("An apparent transient Exception!"), None)) + Future.successful(JobFailedRetryableResponse(jobDescriptor.key, new RuntimeException("An apparent transient Exception!"), None)) else - Future.successful(SucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) + Future.successful(JobSucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) } override def recover = execute diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala index 46f28f44757..c884816108f 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala @@ -2,13 +2,13 @@ package cromwell.engine.backend.mock import akka.actor.{ActorRef, Props} import cromwell.backend._ -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} class RetryableBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/package.scala b/engine/src/test/scala/cromwell/engine/backend/mock/package.scala index 4baeb9c33aa..a2f914121e3 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/package.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/package.scala @@ -9,7 +9,7 @@ package object mock { // This is used by stubbed backends that are to be used in tests to prepare dummy outputs for job def taskOutputToJobOutput(taskOutput: TaskOutput) = - taskOutput.name -> JobOutput(sampleValue(taskOutput.wdlType)) + taskOutput.unqualifiedName -> JobOutput(sampleValue(taskOutput.wdlType)) private def sampleValue(wdlType: WdlType): WdlValue = wdlType match { case WdlIntegerType => WdlInteger(3) diff --git a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala index 540da9863c8..6dec787d546 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala @@ -6,10 +6,11 @@ import java.time.OffsetDateTime import akka.actor._ import akka.pattern.ask import akka.testkit.TestKit +import akka.util.Timeout import better.files._ import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec._ -import cromwell.core.WorkflowSourceFiles +import cromwell.CromwellTestKitSpec._ +import cromwell.core.{WorkflowSourceFilesCollection} import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.SingleWorkflowRunnerActor.RunWorkflow import cromwell.engine.workflow.SingleWorkflowRunnerActorSpec._ @@ -17,7 +18,7 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowStoreActor} import cromwell.util.SampleWdl import cromwell.util.SampleWdl.{ExpressionsInInputs, GoodbyeWorld, ThreeStep} -import cromwell.{AlwaysHappyJobStoreActor, CromwellTestkitSpec, EmptyCallCacheReadActor} +import cromwell.{AlwaysHappyJobStoreActor, AlwaysHappySubWorkflowStoreActor, CromwellTestKitSpec, EmptyCallCacheReadActor} import org.scalatest.prop.{TableDrivenPropertyChecks, TableFor3} import spray.json._ @@ -45,29 +46,34 @@ object SingleWorkflowRunnerActorSpec { def toFields = jsValue.get.asJsObject.fields } - class TestSingleWorkflowRunnerActor(source: WorkflowSourceFiles, + class TestSingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, metadataOutputPath: Option[Path]) extends SingleWorkflowRunnerActor(source, metadataOutputPath) { - override lazy val serviceRegistryActor = CromwellTestkitSpec.ServiceRegistryActorInstance + override lazy val serviceRegistryActor = CromwellTestKitSpec.ServiceRegistryActorInstance } } -abstract class SingleWorkflowRunnerActorSpec extends CromwellTestkitSpec { +abstract class SingleWorkflowRunnerActorSpec extends CromwellTestKitSpec { private val workflowStore = system.actorOf(WorkflowStoreActor.props(new InMemoryWorkflowStore, dummyServiceRegistryActor)) private val jobStore = system.actorOf(AlwaysHappyJobStoreActor.props) + private val subWorkflowStore = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) private val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) private val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) def workflowManagerActor(): ActorRef = { - system.actorOf(Props(new WorkflowManagerActor(ConfigFactory.load(), + val params = WorkflowManagerActorParams(ConfigFactory.load(), workflowStore, dummyServiceRegistryActor, dummyLogCopyRouter, jobStore, + subWorkflowStore, callCacheReadActor, jobTokenDispenserActor, - BackendSingletonCollection(Map.empty))), "WorkflowManagerActor") + BackendSingletonCollection(Map.empty), + abortJobsOnTerminate = false, + serverMode = false) + system.actorOf(Props(new WorkflowManagerActor(params)), "WorkflowManagerActor") } def createRunnerActor(sampleWdl: SampleWdl = ThreeStep, managerActor: => ActorRef = workflowManagerActor(), @@ -78,7 +84,7 @@ abstract class SingleWorkflowRunnerActorSpec extends CromwellTestkitSpec { def singleWorkflowActor(sampleWdl: SampleWdl = ThreeStep, managerActor: => ActorRef = workflowManagerActor(), outputFile: => Option[Path] = None): Unit = { val actorRef = createRunnerActor(sampleWdl, managerActor, outputFile) - val futureResult = actorRef ? RunWorkflow + val futureResult = actorRef.ask(RunWorkflow)(timeout = new Timeout(TimeoutDuration)) Await.ready(futureResult, Duration.Inf) () } @@ -111,9 +117,8 @@ class SingleWorkflowRunnerActorWithMetadataSpec extends SingleWorkflowRunnerActo singleWorkflowActor( sampleWdl = wdlFile, outputFile = Option(metadataFile.path)) + TestKit.shutdownActorSystem(system, TimeoutDuration) } - TestKit.shutdownActorSystem(system, TimeoutDuration) - val metadataFileContent = metadataFile.contentAsString val metadata = metadataFileContent.parseJson.asJsObject.fields metadata.get("id") shouldNot be(empty) @@ -200,7 +205,7 @@ class SingleWorkflowRunnerActorWithMetadataOnFailureSpec extends SingleWorkflowR val calls = metadata.get("calls").toFields calls should not be empty - val callSeq = calls("goodbye.goodbye").asInstanceOf[JsArray].elements + val callSeq = calls("wf_goodbye.goodbye").asInstanceOf[JsArray].elements callSeq should have size 1 val call = callSeq.head.asJsObject.fields val inputs = call.get("inputs").toFields diff --git a/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala index b98c5657f95..1301f0e9b15 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala @@ -3,8 +3,8 @@ package cromwell.engine.workflow import akka.actor.{Actor, ActorRef} import akka.testkit.{TestActorRef, TestFSMRef, TestProbe} import com.typesafe.config.{Config, ConfigFactory} -import cromwell.backend.AllBackendInitializationData -import cromwell.core.{ExecutionStore, OutputStore, WorkflowId, WorkflowSourceFiles} +import cromwell.backend.{AllBackendInitializationData, JobExecutionMap} +import cromwell.core._ import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor._ @@ -13,13 +13,13 @@ import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor.{StartFinali import cromwell.engine.workflow.lifecycle.WorkflowInitializationActor.{WorkflowInitializationAbortedResponse, WorkflowInitializationFailedResponse} import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{WorkflowExecutionAbortedResponse, WorkflowExecutionFailedResponse, WorkflowExecutionSucceededResponse} import cromwell.util.SampleWdl.ThreeStep -import cromwell.{AlwaysHappyJobStoreActor, CromwellTestkitSpec, EmptyCallCacheReadActor} +import cromwell.{AlwaysHappyJobStoreActor, AlwaysHappySubWorkflowStoreActor, CromwellTestKitSpec, EmptyCallCacheReadActor} import org.scalatest.BeforeAndAfter import org.scalatest.concurrent.Eventually import scala.concurrent.duration._ -class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuilder with BeforeAndAfter with Eventually { +class WorkflowActorSpec extends CromwellTestKitSpec with WorkflowDescriptorBuilder with BeforeAndAfter with Eventually { override implicit val actorSystem = system val mockServiceRegistryActor = TestActorRef(new Actor { @@ -53,6 +53,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild serviceRegistryActor = mockServiceRegistryActor, workflowLogCopyRouter = TestProbe().ref, jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props), + subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props), callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props), jobTokenDispenserActor = TestProbe().ref ), @@ -62,7 +63,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild actor } - implicit val TimeoutDuration = CromwellTestkitSpec.TimeoutDuration + implicit val TimeoutDuration = CromwellTestKitSpec.TimeoutDuration "WorkflowActor" should { @@ -95,7 +96,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild "run Finalization if Execution fails" in { val actor = createWorkflowActor(ExecutingWorkflowState) deathwatch watch actor - actor ! WorkflowExecutionFailedResponse(ExecutionStore.empty, OutputStore.empty, Seq(new Exception("Execution Failed"))) + actor ! WorkflowExecutionFailedResponse(Map.empty, new Exception("Execution Failed")) finalizationProbe.expectMsg(StartFinalizationCommand) actor.stateName should be(FinalizingWorkflowState) actor ! WorkflowFinalizationSucceededResponse @@ -108,9 +109,9 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild deathwatch watch actor actor ! AbortWorkflowCommand eventually { actor.stateName should be(WorkflowAbortingState) } - currentLifecycleActor.expectMsgPF(CromwellTestkitSpec.TimeoutDuration) { + currentLifecycleActor.expectMsgPF(CromwellTestKitSpec.TimeoutDuration) { case EngineLifecycleActorAbortCommand => - actor ! WorkflowExecutionAbortedResponse(ExecutionStore.empty, OutputStore.empty) + actor ! WorkflowExecutionAbortedResponse(Map.empty) } finalizationProbe.expectMsg(StartFinalizationCommand) actor.stateName should be(FinalizingWorkflowState) @@ -122,7 +123,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild "run Finalization actor if Execution succeeds" in { val actor = createWorkflowActor(ExecutingWorkflowState) deathwatch watch actor - actor ! WorkflowExecutionSucceededResponse(ExecutionStore.empty, OutputStore.empty) + actor ! WorkflowExecutionSucceededResponse(Map.empty, Map.empty) finalizationProbe.expectMsg(StartFinalizationCommand) actor.stateName should be(FinalizingWorkflowState) actor ! WorkflowFinalizationSucceededResponse @@ -151,13 +152,14 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild class MockWorkflowActor(val finalizationProbe: TestProbe, workflowId: WorkflowId, startMode: StartMode, - workflowSources: WorkflowSourceFiles, + workflowSources: WorkflowSourceFilesCollection, conf: Config, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, - jobTokenDispenserActor: ActorRef) extends WorkflowActor(workflowId, startMode, workflowSources, conf, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, BackendSingletonCollection(Map.empty)) { + jobTokenDispenserActor: ActorRef) extends WorkflowActor(workflowId, startMode, workflowSources, conf, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, BackendSingletonCollection(Map.empty), serverMode = true) { - override def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, outputStore: OutputStore) = finalizationProbe.ref + override def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, worfklowOutputs: CallOutputs) = finalizationProbe.ref } diff --git a/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala b/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala index 73013e71284..e4f5e1350b5 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala @@ -2,8 +2,8 @@ package cromwell.engine.workflow import akka.actor.{ActorSystem, Props} import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.CromwellTestKitSpec +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorCommand, MaterializeWorkflowDescriptorFailureResponse, MaterializeWorkflowDescriptorSuccessResponse, WorkflowDescriptorMaterializationResult} @@ -12,16 +12,16 @@ import scala.concurrent.Await trait WorkflowDescriptorBuilder { - implicit val awaitTimeout = CromwellTestkitSpec.TimeoutDuration + implicit val awaitTimeout = CromwellTestKitSpec.TimeoutDuration implicit val actorSystem: ActorSystem - def createMaterializedEngineWorkflowDescriptor(id: WorkflowId, workflowSources: WorkflowSourceFiles): EngineWorkflowDescriptor = { + def createMaterializedEngineWorkflowDescriptor(id: WorkflowId, workflowSources: WorkflowSourceFilesCollection): EngineWorkflowDescriptor = { import akka.pattern.ask implicit val timeout = akka.util.Timeout(awaitTimeout) implicit val ec = actorSystem.dispatcher val serviceRegistryIgnorer = actorSystem.actorOf(Props.empty) - val actor = actorSystem.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryIgnorer, id), "MaterializeWorkflowDescriptorActor-" + id.id) + val actor = actorSystem.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryIgnorer, id, importLocalFilesystem = false), "MaterializeWorkflowDescriptorActor-" + id.id) val workflowDescriptorFuture = actor.ask( MaterializeWorkflowDescriptorCommand(workflowSources, ConfigFactory.load) ).mapTo[WorkflowDescriptorMaterializationResult] diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala index 970bc715694..0715ebeedda 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala @@ -3,8 +3,8 @@ package cromwell.engine.workflow.lifecycle import akka.actor.Props import akka.testkit.TestDuration import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec -import cromwell.core.{WorkflowId, WorkflowOptions, WorkflowSourceFiles} +import cromwell.CromwellTestKitSpec +import cromwell.core.{WorkflowId, WorkflowOptions, WorkflowSourceFilesWithoutImports} import cromwell.engine.backend.{BackendConfigurationEntry, CromwellBackends} import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorCommand, MaterializeWorkflowDescriptorFailureResponse, MaterializeWorkflowDescriptorSuccessResponse} import cromwell.util.SampleWdl.HelloWorld @@ -16,7 +16,7 @@ import wdl4s.values.{WdlInteger, WdlString} import scala.concurrent.duration._ -class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with BeforeAndAfter with MockitoSugar { +class MaterializeWorkflowDescriptorActorSpec extends CromwellTestKitSpec with BeforeAndAfter with MockitoSugar { val workflowId = WorkflowId.randomId() val minimumConf = ConfigFactory.parseString( @@ -54,18 +54,18 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be "MaterializeWorkflowDescriptorActor" should { "accept valid WDL, inputs and options files" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdlSourceNoDocker, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdlSourceNoDocker, validInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => wfDesc.id shouldBe workflowId - wfDesc.name shouldBe "hello" + wfDesc.name shouldBe "wf_hello" wfDesc.namespace.tasks.size shouldBe 1 - wfDesc.workflowInputs.head shouldBe (("hello.hello.addressee", WdlString("world"))) - wfDesc.backendDescriptor.inputs.head shouldBe (("hello.hello.addressee", WdlString("world"))) + wfDesc.workflowInputs.head shouldBe (("wf_hello.hello.addressee", WdlString("world"))) + wfDesc.backendDescriptor.inputs.head shouldBe (("wf_hello.hello.addressee", WdlString("world"))) wfDesc.getWorkflowOption(WorkflowOptions.WriteToCache) shouldBe Option("true") wfDesc.getWorkflowOption(WorkflowOptions.ReadFromCache) shouldBe None // Default backend assignment is "Local": @@ -73,7 +73,7 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be case (call, assignment) if call.task.name.equals("hello") => assignment shouldBe "Local" case (call, assignment) => fail(s"Unexpected call: ${call.task.name}") } - wfDesc.engineFilesystems.size shouldBe 1 + wfDesc.pathBuilders.size shouldBe 1 case MaterializeWorkflowDescriptorFailureResponse(reason) => fail(s"Materialization failed with $reason") case unknown => fail(s"Unexpected materialization response: $unknown") @@ -99,8 +99,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be |{ "foo.i": "17" } """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, inputs, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, inputs, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -149,8 +149,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | } |} """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, "{}", defaultDocker) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, "{}", defaultDocker) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -196,14 +196,14 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be val cromwellBackends = CromwellBackends(fauxBackendEntries) // Run the test: - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, cromwellBackends)) - val sources = WorkflowSourceFiles(wdl, "{}", "{}") + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, cromwellBackends, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, "{}", "{}") materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, differentDefaultBackendConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => - wfDesc.namespace.workflow.calls foreach { + wfDesc.namespace.workflow.taskCalls foreach { case call if call.task.name.equals("a") => wfDesc.backendAssignments(call) shouldBe "SpecifiedBackend" case call if call.task.name.equals("b") => @@ -231,8 +231,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be |} """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, "{}", "{}") + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, "{}", "{}") materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, differentDefaultBackendConf) within(Timeout) { @@ -250,8 +250,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be } "reject an invalid WDL source" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(unstructuredFile, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(unstructuredFile, validInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -274,8 +274,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | |# no workflow foo { ... } block!! """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(noWorkflowWdl, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(noWorkflowWdl, validInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -299,8 +299,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | |workflow foo { } """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val badWdlSources = WorkflowSourceFiles(noWorkflowWdl, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val badWdlSources = WorkflowSourceFilesWithoutImports(noWorkflowWdl, validInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(badWdlSources, minimumConf) within(Timeout) { @@ -318,8 +318,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be "reject an invalid options file" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdlSourceNoDocker, validInputsJson, unstructuredFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdlSourceNoDocker, validInputsJson, unstructuredFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -336,8 +336,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be } "reject an invalid workflow inputs file" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdlSourceNoDocker, unstructuredFile, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdlSourceNoDocker, unstructuredFile, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -354,15 +354,15 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be } "reject requests if any required inputs are missing" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) val noInputsJson = "{}" - val badOptionsSources = WorkflowSourceFiles(wdlSourceNoDocker, noInputsJson, validOptionsFile) + val badOptionsSources = WorkflowSourceFilesWithoutImports(wdlSourceNoDocker, noInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(badOptionsSources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nRequired workflow input 'hello.hello.addressee' not specified") + reason.getMessage should startWith("Workflow input processing failed.\nRequired workflow input 'wf_hello.hello.addressee' not specified") case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") @@ -381,14 +381,14 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | call bar |} """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, "{}", validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, "{}", validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nInvalid right-side type of 'foo.j'. Expecting Int, got String") + reason.getMessage should startWith("Workflow input processing failed.\nUnable to load namespace from workflow: ERROR: Value for j is not coerceable into a Int") case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala new file mode 100644 index 00000000000..1f61772e659 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala @@ -0,0 +1,213 @@ +package cromwell.engine.workflow.lifecycle.execution + +import java.util.UUID + +import akka.actor.Props +import akka.testkit.{TestFSMRef, TestProbe} +import cromwell.backend.{AllBackendInitializationData, BackendWorkflowDescriptor, JobExecutionMap} +import cromwell.core._ +import cromwell.core.callcaching.CallCachingOff +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import cromwell.engine.backend.BackendSingletonCollection +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor.{CallPreparationFailed, SubWorkflowPreparationSucceeded} +import cromwell.engine.workflow.lifecycle.execution.SubWorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ +import cromwell.engine.{ContinueWhilePossible, EngineWorkflowDescriptor} +import cromwell.subworkflowstore.SubWorkflowStoreActor.{QuerySubWorkflow, SubWorkflowFound, SubWorkflowNotFound} +import org.scalatest.concurrent.Eventually +import org.scalatest.{FlatSpecLike, Matchers} +import org.specs2.mock.Mockito +import wdl4s.{WdlNamespaceWithWorkflow, Workflow, WorkflowCall} + +import scala.concurrent.duration._ +import scala.language.postfixOps + +class SubWorkflowExecutionActorSpec extends TestKitSuite with FlatSpecLike with Matchers with Mockito with Eventually { + + behavior of "SubWorkflowExecutionActor" + + val serviceRegistryProbe = TestProbe() + val jobStoreProbe = TestProbe() + val subWorkflowStoreProbe = TestProbe() + val callCacheReadActorProbe = TestProbe() + val jobTokenDispenserProbe = TestProbe() + val preparationActor = TestProbe() + val subWorkflowActor = TestProbe() + val deathWatch = TestProbe() + val parentProbe = TestProbe() + val parentBackendDescriptor = mock[BackendWorkflowDescriptor] + val parentWorkflowId: WorkflowId = WorkflowId.randomId() + parentBackendDescriptor.id returns parentWorkflowId + val parentWorkflowDescriptor = EngineWorkflowDescriptor( + mock[WdlNamespaceWithWorkflow], + parentBackendDescriptor, + Map.empty, + Map.empty, + ContinueWhilePossible, + List.empty, + CallCachingOff + ) + val subWorkflow = mock[Workflow] + subWorkflow.unqualifiedName returns "sub_wf" + val subWorkflowCall = mock[WorkflowCall] + subWorkflowCall.fullyQualifiedName returns "foo.bar" + subWorkflowCall.callable returns subWorkflow + val subKey = SubWorkflowKey(subWorkflowCall, None, 1) + + val awaitTimeout: FiniteDuration = 10 seconds + + def buildEWEA(restart: Boolean = false) = { + new TestFSMRef[SubWorkflowExecutionActorState, SubWorkflowExecutionActorData, SubWorkflowExecutionActor](system, Props( + new SubWorkflowExecutionActor( + subKey, + WorkflowExecutionActorData.empty(parentWorkflowDescriptor), + Map.empty, + serviceRegistryProbe.ref, + jobStoreProbe.ref, + subWorkflowStoreProbe.ref, + callCacheReadActorProbe.ref, + jobTokenDispenserProbe.ref, + BackendSingletonCollection(Map.empty), + AllBackendInitializationData(Map.empty), + restart + ) { + override def createSubWorkflowPreparationActor(subWorkflowId: WorkflowId) = preparationActor.ref + override def createSubWorkflowActor(createSubWorkflowActor: EngineWorkflowDescriptor) = subWorkflowActor.ref + }), parentProbe.ref, s"SubWorkflowExecutionActorSpec-${UUID.randomUUID()}") + } + + it should "Check the sub workflow store when restarting" in { + val ewea = buildEWEA(restart = true) + ewea.setState(SubWorkflowPendingState) + + ewea ! Execute + subWorkflowStoreProbe.expectMsg(QuerySubWorkflow(parentWorkflowId, subKey)) + eventually { + ewea.stateName shouldBe SubWorkflowCheckingStoreState + } + } + + it should "Reuse sub workflow id if found in the store" in { + import cromwell.core.ExecutionIndex._ + + val ewea = buildEWEA(restart = true) + ewea.setState(SubWorkflowCheckingStoreState) + + val subWorkflowUuid = WorkflowId.randomId() + ewea ! SubWorkflowFound(SubWorkflowStoreEntry(Option(0), parentWorkflowId.toString, subKey.scope.fullyQualifiedName, subKey.index.fromIndex, subKey.attempt, subWorkflowUuid.toString, None)) + preparationActor.expectMsg(CallPreparationActor.Start) + parentProbe.expectMsg(JobStarting(subKey)) + + eventually { + ewea.stateName shouldBe SubWorkflowPreparingState + ewea.stateData.subWorkflowId shouldBe Some(subWorkflowUuid) + } + } + + it should "Fall back to a random Id if the sub workflow id is not found in the store" in { + val ewea = buildEWEA(restart = true) + ewea.setState(SubWorkflowCheckingStoreState) + + ewea ! SubWorkflowNotFound(QuerySubWorkflow(parentWorkflowId, subKey)) + preparationActor.expectMsg(CallPreparationActor.Start) + parentProbe.expectMsg(JobStarting(subKey)) + + eventually { + ewea.stateName shouldBe SubWorkflowPreparingState + ewea.stateData.subWorkflowId should not be empty + } + } + + it should "Prepare a sub workflow" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowPendingState) + + ewea ! Execute + preparationActor.expectMsg(CallPreparationActor.Start) + parentProbe.expectMsg(JobStarting(subKey)) + eventually { + ewea.stateName shouldBe SubWorkflowPreparingState + } + } + + it should "Run a sub workflow" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowPreparingState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + val subWorkflowId = WorkflowId.randomId() + val subBackendDescriptor = mock[BackendWorkflowDescriptor] + subBackendDescriptor.id returns subWorkflowId + val subWorkflowDescriptor = EngineWorkflowDescriptor( + mock[WdlNamespaceWithWorkflow], + subBackendDescriptor, + Map.empty, + Map.empty, + ContinueWhilePossible, + List.empty, + CallCachingOff + ) + + ewea ! SubWorkflowPreparationSucceeded(subWorkflowDescriptor, Map.empty) + subWorkflowActor.expectMsg(WorkflowExecutionActor.ExecuteWorkflowCommand) + parentProbe.expectMsg(JobRunning(subKey, Map.empty, Option(subWorkflowActor.ref))) + eventually { + ewea.stateName shouldBe SubWorkflowRunningState + } + } + + it should "Fail a sub workflow if preparation failed" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowPreparingState) + deathWatch watch ewea + + val subWorkflowKey = mock[SubWorkflowKey] + val throwable: Exception = new Exception("Expected test exception") + val preparationFailedMessage: CallPreparationFailed = CallPreparationFailed(subWorkflowKey, throwable) + ewea ! preparationFailedMessage + parentProbe.expectMsg(SubWorkflowFailedResponse(subKey, Map.empty, throwable)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + + it should "Relay Workflow Successful message" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowRunningState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + deathWatch watch ewea + + val jobExecutionMap: JobExecutionMap = Map.empty + val outputs: CallOutputs = Map.empty[LocallyQualifiedName, JobOutput] + val workflowSuccessfulMessage = WorkflowExecutionSucceededResponse(jobExecutionMap, outputs) + ewea ! workflowSuccessfulMessage + parentProbe.expectMsg(SubWorkflowSucceededResponse(subKey, jobExecutionMap, outputs)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + + it should "Relay Workflow Failed message" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowRunningState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + deathWatch watch ewea + + val jobExecutionMap: JobExecutionMap = Map.empty + val expectedException: Exception = new Exception("Expected test exception") + + val workflowSuccessfulMessage = WorkflowExecutionFailedResponse(jobExecutionMap, expectedException) + ewea ! workflowSuccessfulMessage + parentProbe.expectMsg(SubWorkflowFailedResponse(subKey, jobExecutionMap, expectedException)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + + it should "Relay Workflow Aborted message" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowRunningState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + deathWatch watch ewea + + val jobExecutionMap: JobExecutionMap = Map.empty + val workflowAbortedMessage = WorkflowExecutionAbortedResponse(jobExecutionMap) + ewea ! workflowAbortedMessage + parentProbe.expectMsg(SubWorkflowAbortedResponse(subKey, jobExecutionMap)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala index 01af65d1aeb..51007a2dfc8 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala @@ -12,13 +12,13 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.services.ServiceRegistryActor import cromwell.services.metadata.MetadataService import cromwell.util.SampleWdl -import cromwell.{AlwaysHappyJobStoreActor, CromwellTestkitSpec, EmptyCallCacheReadActor, MetadataWatchActor} +import cromwell._ import org.scalatest.BeforeAndAfter -import scala.concurrent.{Await, Promise} import scala.concurrent.duration._ +import scala.concurrent.{Await, Promise} -class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter with WorkflowDescriptorBuilder { +class WorkflowExecutionActorSpec extends CromwellTestKitSpec with BeforeAndAfter with WorkflowDescriptorBuilder { override implicit val actorSystem = system implicit val DefaultDuration = 20.seconds.dilated @@ -53,6 +53,7 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter val metadataWatcherProps = Props(MetadataWatchActor(metadataSuccessPromise, requiredMetadataMatchers: _*)) val serviceRegistryActor = system.actorOf(ServiceRegistryActor.props(ConfigFactory.load(), overrides = Map(MetadataService.MetadataServiceName -> metadataWatcherProps))) val jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props) + val subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) val MockBackendConfigEntry = BackendConfigurationEntry( name = "Mock", @@ -66,12 +67,12 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter val callCacheReadActor = TestProbe() val workflowExecutionActor = system.actorOf( - WorkflowExecutionActor.props(workflowId, engineWorkflowDescriptor, serviceRegistryActor, jobStoreActor, + WorkflowExecutionActor.props(engineWorkflowDescriptor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, callCacheReadActor.ref, jobTokenDispenserActor, MockBackendSingletonCollection, AllBackendInitializationData.empty, restarting = false), "WorkflowExecutionActor") EventFilter.info(pattern = ".*Final Outputs", occurrences = 1).intercept { - EventFilter.info(pattern = "Starting calls: hello.hello", occurrences = 3).intercept { + EventFilter.info(pattern = "Starting calls: wf_hello.hello", occurrences = 3).intercept { workflowExecutionActor ! ExecuteWorkflowCommand } } @@ -86,6 +87,7 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter "execute a workflow with scatters" in { val serviceRegistry = mockServiceRegistryActor val jobStore = system.actorOf(AlwaysHappyJobStoreActor.props) + val subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) @@ -99,7 +101,7 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter val workflowId = WorkflowId.randomId() val engineWorkflowDescriptor = createMaterializedEngineWorkflowDescriptor(workflowId, SampleWdl.SimpleScatterWdl.asWorkflowSources(runtime = runtimeSection)) val workflowExecutionActor = system.actorOf( - WorkflowExecutionActor.props(workflowId, engineWorkflowDescriptor, serviceRegistry, jobStore, + WorkflowExecutionActor.props(engineWorkflowDescriptor, serviceRegistry, jobStore, subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, MockBackendSingletonCollection, AllBackendInitializationData.empty, restarting = false), "WorkflowExecutionActor") diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala index d79a383fbf8..34ac835cd31 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala @@ -1,23 +1,23 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching import akka.actor.{ActorRef, ActorSystem, Props} -import akka.testkit.{ImplicitSender, TestKit, TestProbe} +import akka.testkit.{ImplicitSender, TestProbe} import cats.data.NonEmptyList -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec +import cromwell.backend._ import cromwell.backend.callcaching.FileHashingActor.{FileHashResponse, SingleFileHashRequest} -import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendWorkflowDescriptor, RuntimeAttributeDefinition} import cromwell.core.callcaching._ import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, CallCacheHashes} import org.scalatest.mockito.MockitoSugar -import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} +import org.scalatest.{Matchers, WordSpecLike} import wdl4s._ import wdl4s.values.{WdlFile, WdlValue} import scala.concurrent.duration._ import scala.language.postfixOps -class EngineJobHashingActorSpec extends TestKit(new CromwellTestkitSpec.TestWorkflowManagerSystem().actorSystem) - with ImplicitSender with WordSpecLike with Matchers with MockitoSugar with BeforeAndAfterAll { +class EngineJobHashingActorSpec extends CromwellTestKitSpec + with ImplicitSender with WordSpecLike with Matchers with MockitoSugar { import EngineJobHashingActorSpec._ @@ -162,13 +162,9 @@ class EngineJobHashingActorSpec extends TestKit(new CromwellTestkitSpec.TestWork } } } - - override def afterAll() = { - TestKit.shutdownActorSystem(system) - } } -object EngineJobHashingActorSpec extends MockitoSugar { +object EngineJobHashingActorSpec extends BackendSpec { import org.mockito.Mockito._ def createEngineJobHashingActor @@ -198,12 +194,12 @@ object EngineJobHashingActorSpec extends MockitoSugar { def templateJobDescriptor(inputs: Map[LocallyQualifiedName, WdlValue] = Map.empty) = { val task = mock[Task] - val call = mock[Call] + val call = mock[TaskCall] when(task.commandTemplateString).thenReturn("Do the stuff... now!!") when(task.outputs).thenReturn(List.empty) when(call.task).thenReturn(task) val workflowDescriptor = mock[BackendWorkflowDescriptor] - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, None, 1), Map.empty, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, None, 1), Map.empty, fqnMapToDeclarationMap(inputs)) jobDescriptor } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala index 7aa4ccfda04..b3971175944 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala @@ -3,9 +3,10 @@ package cromwell.engine.workflow.lifecycle.execution.ejea import cats.data.NonEmptyList import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ import EngineJobExecutionActorSpec._ -import cromwell.core.callcaching.CallCachingMode +import cromwell.core.callcaching._ import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CallCacheHashes, EJHAResponse, HashError} import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId + import scala.util.{Failure, Success, Try} import cromwell.engine.workflow.lifecycle.execution.ejea.HasJobSuccessResponse.SuccessfulCallCacheHashes @@ -97,20 +98,43 @@ class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec } } + if (mode.readFromCache) { s"invalidate a call for caching if backend coping failed when it was going to receive $hashComboName, if call caching is $mode" in { ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, mode) // Send the response from the copying actor ejea ! failureNonRetryableResponse expectInvalidateCallCacheActor(cacheId) - eventually { ejea.stateName should be(InvalidatingCacheEntry) } - ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper. bjeaProps, initialHashData, cacheHit)) + eventually { + ejea.stateName should be(InvalidatingCacheEntry) + } + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, initialHashData, cacheHit)) + } + + s"not invalidate a call for caching if backend coping failed when invalidation is disabled, when it was going to receive $hashComboName, if call caching is $mode" in { + val invalidationDisabledOptions = CallCachingOptions(invalidateBadCacheResults = false) + val cacheInvalidationDisabledMode = mode match { + case CallCachingActivity(rw, options) => CallCachingActivity(rw, invalidationDisabledOptions) + case _ => fail(s"Mode $mode not appropriate for cache invalidation tests") + } + ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, cacheInvalidationDisabledMode) + // Send the response from the copying actor + ejea ! failureNonRetryableResponse + + eventually { + ejea.stateName should be(RunningJob) + } + // Make sure we didn't start invalidating anything: + helper.invalidateCacheActorCreations.hasExactlyOne should be(false) + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, initialHashData, None)) } s"invalidate a call for caching if backend coping failed (preserving and received hashes) when call caching is $mode, the EJEA has $hashComboName and then gets a success result" in { ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, mode) // Send the response from the EJHA (if there was one!): - ejhaResponse foreach { ejea ! _ } + ejhaResponse foreach { + ejea ! _ + } // Nothing should happen here: helper.jobStoreProbe.expectNoMsg(awaitAlmostNothing) @@ -120,9 +144,12 @@ class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec ejea ! failureNonRetryableResponse expectInvalidateCallCacheActor(cacheId) - eventually { ejea.stateName should be(InvalidatingCacheEntry) } - ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper. bjeaProps, finalHashData, cacheHit)) + eventually { + ejea.stateName should be(InvalidatingCacheEntry) + } + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, finalHashData, cacheHit)) } + } } } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala index a8f2bdf46b5..5a9bb4f5c2a 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala @@ -1,9 +1,9 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.backend.BackendJobExecutionActor.{FailedNonRetryableResponse, FailedRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.core._ +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{CheckingJobStore, NoData, PreparingJob} -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec.EnhancedTestEJEA import cromwell.jobstore.JobStoreActor.{JobComplete, JobNotComplete} import cromwell.jobstore.{JobResultFailure, JobResultSuccess} @@ -17,12 +17,12 @@ class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { createCheckingJobStoreEjea() ejea.setState(CheckingJobStore) val returnCode: Option[Int] = Option(0) - val jobOutputs: JobOutputs = Map.empty + val jobOutputs: CallOutputs = Map.empty ejea ! JobComplete(JobResultSuccess(returnCode, jobOutputs)) helper.replyToProbe.expectMsgPF(awaitTimeout) { - case response: SucceededResponse => + case response: JobSucceededResponse => response.returnCode shouldBe returnCode response.jobOutputs shouldBe jobOutputs } @@ -40,11 +40,11 @@ class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { ejea ! JobComplete(JobResultFailure(returnCode, reason, retryable)) helper.replyToProbe.expectMsgPF(awaitTimeout) { - case response: FailedNonRetryableResponse => + case response: JobFailedNonRetryableResponse => false should be(retryable) response.returnCode shouldBe returnCode response.throwable shouldBe reason - case response: FailedRetryableResponse => + case response: JobFailedRetryableResponse => true should be(retryable) response.returnCode shouldBe returnCode response.throwable shouldBe reason @@ -59,7 +59,7 @@ class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { ejea.setState(CheckingJobStore) ejea ! JobNotComplete - helper.jobPreparationProbe.expectMsg(awaitTimeout, "expecting RecoverJobCommand", JobPreparationActor.Start) + helper.jobPreparationProbe.expectMsg(awaitTimeout, "expecting RecoverJobCommand", CallPreparationActor.Start) ejea.stateName should be(PreparingJob) ejea.stop() diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala index 1d6f2ccbe5b..bdb426b6037 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala @@ -1,9 +1,10 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import EngineJobExecutionActorSpec._ +import cromwell.backend.BackendJobExecutionActor.JobFailedNonRetryableResponse import cromwell.core.callcaching.CallCachingMode -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.{BackendJobPreparationFailed, BackendJobPreparationSucceeded} +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor.{BackendJobPreparationSucceeded, CallPreparationFailed} +import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ import org.scalatest.concurrent.Eventually class EjeaPreparingJobSpec extends EngineJobExecutionActorSpec with CanExpectHashingInitialization with Eventually { @@ -34,10 +35,11 @@ class EjeaPreparingJobSpec extends EngineJobExecutionActorSpec with CanExpectHas } s"Not proceed if Job Preparation fails ($mode)" in { - val prepFailedResponse = BackendJobPreparationFailed(helper.jobDescriptorKey, new Exception("The goggles! They do nothing!")) + val prepActorResponse = CallPreparationFailed(helper.jobDescriptorKey, new Exception("The goggles! They do nothing!")) + val prepFailedEjeaResponse = JobFailedNonRetryableResponse(helper.jobDescriptorKey, prepActorResponse.throwable, None) ejea = ejeaInPreparingState(mode) - ejea ! prepFailedResponse - helper.replyToProbe.expectMsg(prepFailedResponse) + ejea ! prepActorResponse + helper.replyToProbe.expectMsg(prepFailedEjeaResponse) helper.deathwatch.expectTerminated(ejea, awaitTimeout) } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala index 83ae088350a..3047de56099 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala @@ -1,7 +1,7 @@ package cromwell.engine.workflow.lifecycle.execution.ejea +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor.{JobExecutionTokenDenied, JobExecutionTokenDispensed} import cromwell.jobstore.JobStoreActor.QueryJobCompletion import org.scalatest.concurrent.Eventually @@ -45,7 +45,7 @@ class EjeaRequestingExecutionTokenSpec extends EngineJobExecutionActorSpec with ejea = helper.buildEJEA(restarting = false) ejea ! JobExecutionTokenDispensed(helper.executionToken) - helper.jobPreparationProbe.expectMsg(max = awaitTimeout, hint = "Awaiting job preparation", JobPreparationActor.Start) + helper.jobPreparationProbe.expectMsg(max = awaitTimeout, hint = "Awaiting job preparation", CallPreparationActor.Start) helper.jobStoreProbe.expectNoMsg(awaitAlmostNothing) ejea.stateName should be(PreparingJob) } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala index 1b783a69e7b..73a224a004c 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala @@ -1,7 +1,7 @@ package cromwell.engine.workflow.lifecycle.execution.ejea import EngineJobExecutionActorSpec._ -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse} import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ import cromwell.jobstore.JobStoreActor.{JobStoreWriteFailure, JobStoreWriteSuccess} import cromwell.engine.workflow.lifecycle.execution.ejea.HasJobSuccessResponse.SuccessfulCallCacheHashes @@ -33,7 +33,7 @@ class EjeaUpdatingJobStoreSpec extends EngineJobExecutionActorSpec with HasJobSu val exception = new Exception("I loved Ophelia: forty thousand brothers\\ Could not, with all their quantity of love,\\ Make up my sum. What wilt thou do for her?") ejea ! JobStoreWriteFailure(exception) helper.replyToProbe.expectMsgPF(awaitTimeout) { - case FailedNonRetryableResponse(jobDescriptorKey, reason, None) => + case JobFailedNonRetryableResponse(jobDescriptorKey, reason, None) => jobDescriptorKey should be(helper.jobDescriptorKey) reason.getCause should be(exception) } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala index cbba498116c..10577678154 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala @@ -4,7 +4,7 @@ import akka.actor.Actor import akka.testkit.TestFSMRef import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ import cromwell.jobstore.{Pending => _} -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.backend.BackendJobExecutionActor import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionActorCommand import cromwell.core.callcaching._ @@ -15,7 +15,7 @@ import scala.concurrent.duration._ import scala.language.postfixOps -trait EngineJobExecutionActorSpec extends CromwellTestkitSpec +trait EngineJobExecutionActorSpec extends CromwellTestKitSpec with Matchers with Mockito with BeforeAndAfterAll with BeforeAndAfter { // If we WANT something to happen, make sure it happens within this window: diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala index 8ef607c5b80..b6ddb560169 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala @@ -1,6 +1,6 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, FailedNonRetryableResponse, FailedRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.core.JobOutput import cromwell.core.callcaching._ import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{EJEAData, SucceededResponseData, UpdatingCallCache, UpdatingJobStore} @@ -23,7 +23,7 @@ private[ejea] trait CanValidateJobStoreKey { self: EngineJobExecutionActorSpec = } private[ejea] trait CanExpectCacheWrites extends Eventually { self: EngineJobExecutionActorSpec => - def expectCacheWrite(expectedResponse: SucceededResponse, expectedCallCacheHashes: CallCacheHashes): Unit = { + def expectCacheWrite(expectedResponse: JobSucceededResponse, expectedCallCacheHashes: CallCacheHashes): Unit = { eventually { ejea.stateName should be(UpdatingCallCache) } ejea.stateData should be(SucceededResponseData(expectedResponse, Some(Success(expectedCallCacheHashes)))) helper.callCacheWriteActorCreations match { @@ -83,7 +83,7 @@ private[ejea] trait CanExpectCacheInvalidation extends Eventually { self: Engine private[ejea] trait HasJobSuccessResponse { self: EngineJobExecutionActorSpec => val successRc = Option(171) val successOutputs = Map("a" -> JobOutput(WdlInteger(3)), "b" -> JobOutput(WdlString("bee"))) - def successResponse = SucceededResponse(helper.jobDescriptorKey, successRc, successOutputs, None, Seq.empty) + def successResponse = JobSucceededResponse(helper.jobDescriptorKey, successRc, successOutputs, None, Seq.empty) } private[ejea] object HasJobSuccessResponse { val SuccessfulCallCacheHashes = CallCacheHashes(Set(HashResult(HashKey("whatever you want"), HashValue("whatever you need")))) @@ -93,7 +93,7 @@ private[ejea] trait HasJobFailureResponses { self: EngineJobExecutionActorSpec = val failedRc = Option(12) val failureReason = new Exception("The sixth sheik's sheep is sick!") // Need to delay making the response because job descriptors come from the per-test "helper", which is null outside tests! - def failureRetryableResponse = FailedRetryableResponse(helper.jobDescriptorKey, failureReason, failedRc) - def failureNonRetryableResponse = FailedNonRetryableResponse(helper.jobDescriptorKey, failureReason, Option(12)) + def failureRetryableResponse = JobFailedRetryableResponse(helper.jobDescriptorKey, failureReason, failedRc) + def failureNonRetryableResponse = JobFailedNonRetryableResponse(helper.jobDescriptorKey, failureReason, Option(12)) def abortedResponse = AbortedResponse(helper.jobDescriptorKey) } \ No newline at end of file diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala index 16274a27508..9389362ad41 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala @@ -4,29 +4,27 @@ import java.util.UUID import akka.actor.{ActorRef, ActorSystem, Props} import akka.testkit.{TestFSMRef, TestProbe} -import cromwell.backend.BackendJobExecutionActor.SucceededResponse -import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendLifecycleActorFactory, BackendWorkflowDescriptor} +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse +import cromwell.backend._ import cromwell.core.JobExecutionToken.JobExecutionTokenType import cromwell.core.callcaching.{CallCachingActivity, CallCachingMode, CallCachingOff} -import cromwell.core.{ExecutionStore, JobExecutionToken, OutputStore, WorkflowId} +import cromwell.core.{CallOutputs, JobExecutionToken, WorkflowId} import cromwell.engine.EngineWorkflowDescriptor -import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId -import cromwell.engine.workflow.lifecycle.execution.{EngineJobExecutionActor, WorkflowExecutionActorData} import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{EJEAData, EngineJobExecutionActorState} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ +import cromwell.engine.workflow.lifecycle.execution.{EngineJobExecutionActor, WorkflowExecutionActorData} +import cromwell.engine.workflow.mocks.{DeclarationMock, TaskMock, WdlExpressionMock} +import cromwell.util.AkkaTestUtil._ import org.specs2.mock.Mockito -import wdl4s.WdlExpression.ScopedLookupFunction -import wdl4s.expression.{NoFunctions, WdlFunctions, WdlStandardLibraryFunctions} -import wdl4s.types.{WdlIntegerType, WdlStringType} -import wdl4s.values.{WdlInteger, WdlString, WdlValue} import wdl4s._ -import cromwell.util.AkkaTestUtil._ -import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ - -import scala.util.Success +import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} +import wdl4s.parser.WdlParser.Ast +import wdl4s.types.{WdlIntegerType, WdlStringType} -private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mockito { +private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mockito with TaskMock with WdlExpressionMock with DeclarationMock { val workflowId = WorkflowId.randomId() val workflowName = "wf" @@ -37,34 +35,29 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock val executionToken = JobExecutionToken(JobExecutionTokenType("test", None), UUID.randomUUID()) - val task = mock[Task] - task.declarations returns Seq.empty - task.runtimeAttributes returns RuntimeAttributes(Map.empty) - task.commandTemplateString returns "!!shazam!!" - val stringOutputExpression = mock[WdlExpression] - stringOutputExpression.valueString returns "hello" - stringOutputExpression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlString("hello")) - task.outputs returns Seq(TaskOutput("outString", WdlStringType, stringOutputExpression)) - - val intInputExpression = mock[WdlExpression] - intInputExpression.valueString returns "543" - intInputExpression.evaluate(any[ScopedLookupFunction], any[WdlFunctions[WdlValue]]) returns Success(WdlInteger(543)) - - val intInputDeclaration = mock[Declaration] - intInputDeclaration.name returns "inInt" - intInputDeclaration.expression returns Option(intInputExpression) - intInputDeclaration.wdlType returns WdlIntegerType - task.declarations returns Seq(intInputDeclaration) - - val call: Call = Call(None, jobFqn, task, Set.empty, Map.empty, None) + val task = mockTask( + taskName, + declarations = Seq(mockDeclaration("inInt", WdlIntegerType, mockIntExpression(543))), + outputs = Seq(("outString", WdlStringType, mockStringExpression("hello"))) + ) + + val workflow = new Workflow( + unqualifiedName = workflowName, + workflowOutputWildcards = Seq.empty, + wdlSyntaxErrorFormatter = mock[WdlSyntaxErrorFormatter], + meta = Map.empty, + parameterMeta = Map.empty, + ast = mock[Ast]) + val call: TaskCall = TaskCall(None, task, Map.empty, mock[Ast]) + call.parent_=(workflow) val jobDescriptorKey = BackendJobDescriptorKey(call, jobIndex, jobAttempt) val backendWorkflowDescriptor = BackendWorkflowDescriptor(workflowId, null, null, null) - val backendJobDescriptor = BackendJobDescriptor(backendWorkflowDescriptor, jobDescriptorKey, runtimeAttributes = Map.empty, inputs = Map.empty) + val backendJobDescriptor = BackendJobDescriptor(backendWorkflowDescriptor, jobDescriptorKey, runtimeAttributes = Map.empty, inputDeclarations = Map.empty) var fetchCachedResultsActorCreations: ExpectOne[(CallCachingEntryId, Seq[TaskOutput])] = NothingYet var jobHashingInitializations: ExpectOne[(BackendJobDescriptor, CallCachingActivity)] = NothingYet - var callCacheWriteActorCreations: ExpectOne[(CallCacheHashes, SucceededResponse)] = NothingYet + var callCacheWriteActorCreations: ExpectOne[(CallCacheHashes, JobSucceededResponse)] = NothingYet var invalidateCacheActorCreations: ExpectOne[CallCachingEntryId] = NothingYet val deathwatch = TestProbe() @@ -94,12 +87,12 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock // These two factory methods should never be called from EJEA or any of its descendants: override def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - executionStore: ExecutionStore, - outputStore: OutputStore, + calls: Set[TaskCall], + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: Option[BackendInitializationData]): Option[Props] = throw new UnsupportedOperationException("Unexpected finalization actor creation!") override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = throw new UnsupportedOperationException("Unexpected finalization actor creation!") } @@ -108,14 +101,14 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock (implicit startingState: EngineJobExecutionActorState): TestFSMRef[EngineJobExecutionActorState, EJEAData, MockEjea] = { val factory: BackendLifecycleActorFactory = buildFactory() - val descriptor = EngineWorkflowDescriptor(backendWorkflowDescriptor, Map.empty, null, null, null, callCachingMode) + val descriptor = EngineWorkflowDescriptor(mock[WdlNamespaceWithWorkflow], backendWorkflowDescriptor, Map.empty, null, null, null, callCachingMode) val myBrandNewEjea = new TestFSMRef[EngineJobExecutionActorState, EJEAData, MockEjea](system, Props(new MockEjea( helper = this, jobPreparationProbe = jobPreparationProbe, replyTo = replyToProbe.ref, jobDescriptorKey = jobDescriptorKey, - executionData = WorkflowExecutionActorData(descriptor, ExecutionStore(Map.empty), Map.empty, OutputStore(Map.empty)), + executionData = WorkflowExecutionActorData.empty(descriptor), factory = factory, initializationData = None, restarting = restarting, @@ -149,9 +142,7 @@ private[ejea] class MockEjea(helper: PerTestHelper, override def makeFetchCachedResultsActor(cacheId: CallCachingEntryId, taskOutputs: Seq[TaskOutput]) = helper.fetchCachedResultsActorCreations = helper.fetchCachedResultsActorCreations.foundOne((cacheId, taskOutputs)) override def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity) = helper.jobHashingInitializations = helper.jobHashingInitializations.foundOne((jobDescriptor, activity)) - override def createSaveCacheResultsActor(hashes: CallCacheHashes, success: SucceededResponse) = helper.callCacheWriteActorCreations = helper.callCacheWriteActorCreations.foundOne((hashes, success)) - override def invalidateCacheHit(cacheId: CallCachingEntryId): Unit = { - helper.invalidateCacheActorCreations = helper.invalidateCacheActorCreations.foundOne(cacheId) - } + override def createSaveCacheResultsActor(hashes: CallCacheHashes, success: JobSucceededResponse) = helper.callCacheWriteActorCreations = helper.callCacheWriteActorCreations.foundOne((hashes, success)) + override def invalidateCacheHit(cacheId: CallCachingEntryId): Unit = { helper.invalidateCacheActorCreations = helper.invalidateCacheActorCreations.foundOne(cacheId) } override def createJobPreparationActor(jobPrepProps: Props, name: String) = jobPreparationProbe.ref } diff --git a/engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala b/engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala new file mode 100644 index 00000000000..471d007772d --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala @@ -0,0 +1,21 @@ +package cromwell.engine.workflow.mocks + +import org.specs2.mock.Mockito +import wdl4s.{Declaration, WdlExpression} +import wdl4s.types.WdlType + +object DeclarationMock { + type DeclarationMockType = (String, WdlType, WdlExpression) +} + +trait DeclarationMock extends Mockito { + def mockDeclaration(name: String, + wdlType: WdlType, + expression: WdlExpression) = { + val declaration = mock[Declaration] + declaration.unqualifiedName returns name + declaration.expression returns Option(expression) + declaration.wdlType returns wdlType + declaration + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala b/engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala new file mode 100644 index 00000000000..4d8ef9c1d29 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala @@ -0,0 +1,27 @@ +package cromwell.engine.workflow.mocks + +import cromwell.engine.workflow.mocks.DeclarationMock.DeclarationMockType +import org.specs2.mock.Mockito +import wdl4s._ +import wdl4s.parser.WdlParser.Ast + +trait TaskMock extends Mockito { + + def mockTask(name: String, + declarations: Seq[Declaration] = Seq.empty, + runtimeAttributes: RuntimeAttributes = new RuntimeAttributes(Map.empty), + commandTemplateString: String = "!!shazam!!", + outputs: Seq[DeclarationMockType] = Seq.empty + ) = { + val task = mock[Task] + task.declarations returns declarations + task.runtimeAttributes returns runtimeAttributes + task.commandTemplateString returns commandTemplateString + task.name returns name + task.unqualifiedName returns name + task.outputs returns (outputs map { + case (outputName, wdlType, expression) => TaskOutput(outputName, wdlType, expression, mock[Ast], Option(task)) + }) + task + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala b/engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala new file mode 100644 index 00000000000..6bb15b306f5 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala @@ -0,0 +1,32 @@ +package cromwell.engine.workflow.mocks + +import org.specs2.mock.Mockito +import wdl4s.WdlExpression +import wdl4s.WdlExpression._ +import wdl4s.expression.WdlFunctions +import wdl4s.values.{WdlInteger, WdlString, WdlValue} + +import scala.util.Success + +trait WdlExpressionMock extends Mockito { + val helloStringExpression = { + val expression = mock[WdlExpression] + expression.valueString returns "hello" + expression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlString("hello")) + expression + } + + def mockStringExpression(value: String) = { + val expression = mock[WdlExpression] + expression.valueString returns value + expression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlString(value)) + expression + } + + def mockIntExpression(value: Int) = { + val expression = mock[WdlExpression] + expression.valueString returns value.toString + expression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlInteger(value)) + expression + } +} diff --git a/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala b/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala index fa1cc5067d1..ac84a6c450c 100644 --- a/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala +++ b/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala @@ -1,6 +1,6 @@ package cromwell.jobstore -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.backend.BackendJobDescriptorKey import cromwell.core.{JobOutput, WorkflowId} import cromwell.jobstore.JobStoreActor._ @@ -8,9 +8,10 @@ import cromwell.jobstore.JobStoreServiceSpec._ import cromwell.services.SingletonServicesStore import org.scalatest.Matchers import org.specs2.mock.Mockito +import wdl4s.parser.WdlParser.Ast import wdl4s.types.WdlStringType import wdl4s.values.WdlString -import wdl4s.{Call, Task, TaskOutput, WdlExpression} +import wdl4s._ import scala.concurrent.duration._ import scala.language.postfixOps @@ -20,7 +21,7 @@ object JobStoreServiceSpec { val EmptyExpression = WdlExpression.fromString(""" "" """) } -class JobStoreServiceSpec extends CromwellTestkitSpec with Matchers with Mockito { +class JobStoreServiceSpec extends CromwellTestKitSpec with Matchers with Mockito { "JobStoreService" should { "work" in { @@ -28,10 +29,10 @@ class JobStoreServiceSpec extends CromwellTestkitSpec with Matchers with Mockito val jobStoreService = system.actorOf(JobStoreActor.props(jobStore)) val workflowId = WorkflowId.randomId() - val successCall = mock[Call] + val successCall = mock[TaskCall] successCall.fullyQualifiedName returns "foo.bar" val mockTask = mock[Task] - mockTask.outputs returns Seq(TaskOutput("baz", WdlStringType, EmptyExpression)) + mockTask.outputs returns Seq(TaskOutput("baz", WdlStringType, EmptyExpression, mock[Ast], Option(mockTask))) successCall.task returns mockTask val successKey = BackendJobDescriptorKey(successCall, None, 1).toJobStoreKey(workflowId) @@ -49,7 +50,7 @@ class JobStoreServiceSpec extends CromwellTestkitSpec with Matchers with Mockito case JobComplete(JobResultSuccess(Some(0), os)) if os == outputs => } - val failureCall = mock[Call] + val failureCall = mock[TaskCall] failureCall.fullyQualifiedName returns "baz.qux" val failureKey = BackendJobDescriptorKey(failureCall, None, 1).toJobStoreKey(workflowId) diff --git a/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala b/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala index a6e59679aff..eabc86a5e31 100644 --- a/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala +++ b/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala @@ -1,7 +1,7 @@ package cromwell.jobstore import akka.testkit.TestFSMRef -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.core.WorkflowId import cromwell.jobstore.JobStoreActor.{JobStoreWriteSuccess, RegisterJobCompleted, RegisterWorkflowCompleted} import org.scalatest.{BeforeAndAfter, Matchers} @@ -11,7 +11,7 @@ import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future, Promise} import scala.language.postfixOps -class JobStoreWriterSpec extends CromwellTestkitSpec with Matchers with BeforeAndAfter { +class JobStoreWriterSpec extends CromwellTestKitSpec with Matchers with BeforeAndAfter { var database: WriteCountingJobStore = _ var jobStoreWriter: TestFSMRef[JobStoreWriterState, JobStoreWriterData, JobStoreWriterActor] = _ diff --git a/engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala b/engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala new file mode 100644 index 00000000000..79e41bd705d --- /dev/null +++ b/engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala @@ -0,0 +1,87 @@ +package cromwell.subworkflowstore + +import cromwell.CromwellTestKitSpec +import cromwell.core.{JobKey, WorkflowId, WorkflowSourceFilesWithoutImports} +import cromwell.services.SingletonServicesStore +import cromwell.subworkflowstore.SubWorkflowStoreActor._ +import org.scalatest.Matchers +import org.specs2.mock.Mockito +import wdl4s.{TaskCall, WdlExpression} +import cromwell.core.ExecutionIndex._ + +import scala.concurrent.duration._ +import SubWorkflowStoreSpec._ +import akka.testkit.TestProbe +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.{SubmitWorkflow, WorkflowSubmittedToStore} +import cromwell.engine.workflow.workflowstore.{SqlWorkflowStore, WorkflowStoreActor} + +import scala.language.postfixOps + +object SubWorkflowStoreSpec { + val MaxWait = 5 seconds + val EmptyExpression = WdlExpression.fromString(""" "" """) +} + +class SubWorkflowStoreSpec extends CromwellTestKitSpec with Matchers with Mockito { + "SubWorkflowStore" should { + "work" in { + lazy val subWorkflowStore = new SqlSubWorkflowStore(SingletonServicesStore.databaseInterface) + val subWorkflowStoreService = system.actorOf(SubWorkflowStoreActor.props(subWorkflowStore)) + + lazy val workflowStore = SqlWorkflowStore(SingletonServicesStore.databaseInterface) + val workflowStoreService = system.actorOf(WorkflowStoreActor.props(workflowStore, TestProbe().ref)) + + val parentWorkflowId = WorkflowId.randomId() + val subWorkflowId = WorkflowId.randomId() + val subSubWorkflowId = WorkflowId.randomId() + val call = mock[TaskCall] + call.fullyQualifiedName returns "foo.bar" + val jobKey = new JobKey { + override def scope = call + override def index: Option[Int] = None + override def attempt: Int = 0 + override def tag: String = "foobar" + } + + workflowStoreService ! SubmitWorkflow(WorkflowSourceFilesWithoutImports("", "{}", "{}")) + val rootWorkflowId = expectMsgType[WorkflowSubmittedToStore](10 seconds).workflowId + + // Query for non existing sub workflow + subWorkflowStoreService ! QuerySubWorkflow(parentWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + + // Register sub workflow + subWorkflowStoreService ! RegisterSubWorkflow(rootWorkflowId, parentWorkflowId, jobKey, subWorkflowId) + expectMsgType[SubWorkflowStoreRegisterSuccess](MaxWait) + + // Query for sub workflow + subWorkflowStoreService ! QuerySubWorkflow(parentWorkflowId, jobKey) + val subWorkflowEntry = SubWorkflowStoreEntry(Option(0), parentWorkflowId.toString, jobKey.scope.fullyQualifiedName, jobKey.index.fromIndex, jobKey.attempt, subWorkflowId.toString, Some(0)) + expectMsg[SubWorkflowFound](SubWorkflowFound(subWorkflowEntry)) + + // Register sub sub workflow + subWorkflowStoreService ! RegisterSubWorkflow(rootWorkflowId, subWorkflowId, jobKey, subSubWorkflowId) + expectMsgType[SubWorkflowStoreRegisterSuccess](MaxWait) + + // Query for sub sub workflow + subWorkflowStoreService ! QuerySubWorkflow(subWorkflowId, jobKey) + val subSubWorkflowEntry = SubWorkflowStoreEntry(Option(0), subWorkflowId.toString, jobKey.scope.fullyQualifiedName, jobKey.index.fromIndex, jobKey.attempt, subSubWorkflowId.toString, Some(1)) + expectMsg[SubWorkflowFound](SubWorkflowFound(subSubWorkflowEntry)) + + // Delete root workflow + subWorkflowStoreService ! WorkflowComplete(rootWorkflowId) + expectMsgType[SubWorkflowStoreCompleteSuccess](MaxWait) + + // Verify that everything is gone + subWorkflowStoreService ! QuerySubWorkflow(rootWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + + subWorkflowStoreService ! QuerySubWorkflow(parentWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + + subWorkflowStoreService ! QuerySubWorkflow(subWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + } + } +} diff --git a/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala b/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala index 9904835dc07..b3fbfee1e8c 100644 --- a/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala @@ -7,7 +7,7 @@ import akka.actor.{Actor, Props} import akka.pattern.ask import akka.util.Timeout import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.core._ import cromwell.engine.workflow.workflowstore.WorkflowStoreActor import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.{WorkflowAborted => _, _} @@ -15,7 +15,6 @@ import cromwell.server.{CromwellServerActor, CromwellSystem} import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ import cromwell.services.metadata.impl.MetadataSummaryRefreshActor.MetadataSummarySuccess -import cromwell.util.SampleWdl.DeclarationsWorkflow._ import cromwell.util.SampleWdl.HelloWorld import org.scalatest.concurrent.{PatienceConfiguration, ScalaFutures} import org.scalatest.{FlatSpec, Matchers} @@ -67,7 +66,7 @@ class CromwellApiServiceSpec extends FlatSpec with CromwellApiService with Scala implicit val defaultTimeout = RouteTestTimeout(30.seconds.dilated) override def actorRefFactory = system - override val serviceRegistryActor = CromwellTestkitSpec.ServiceRegistryActorInstance + override val serviceRegistryActor = CromwellTestKitSpec.ServiceRegistryActorInstance override val workflowStoreActor = actorRefFactory.actorOf(Props(new MockWorkflowStoreActor())) override val workflowManagerActor = actorRefFactory.actorOf(Props.empty) @@ -243,7 +242,8 @@ class CromwellApiServiceSpec extends FlatSpec with CromwellApiService with Scala behavior of "REST API submission endpoint" it should "return 201 for a successful workflow submission " in { - Post(s"/workflows/$version", FormData(Seq("wdlSource" -> HelloWorld.wdlSource(), "workflowInputs" -> HelloWorld.rawInputs.toJson.toString()))) ~> + val bodyParts: Map[String, BodyPart] = Map("wdlSource" -> BodyPart(HelloWorld.wdlSource()), "workflowInputs" -> BodyPart(HelloWorld.rawInputs.toJson.toString())) + Post(s"/workflows/$version", MultipartFormData(bodyParts)) ~> submitRoute ~> check { assertResult( @@ -261,7 +261,7 @@ class CromwellApiServiceSpec extends FlatSpec with CromwellApiService with Scala it should "succesfully merge and override multiple input files" in { val input1 = Map("wf.a1" -> "hello", "wf.a2" -> "world").toJson.toString - val input2 = Map.empty.toJson.toString + val input2 = Map.empty[String, String].toJson.toString val overrideInput1 = Map("wf.a2" -> "universe").toJson.toString val allInputs = mergeMaps(Seq(Option(input1), Option(input2), Option(overrideInput1))) @@ -274,9 +274,9 @@ class CromwellApiServiceSpec extends FlatSpec with CromwellApiService with Scala behavior of "REST API batch submission endpoint" it should "return 200 for a successful workflow submission " in { val inputs = HelloWorld.rawInputs.toJson + val bodyParts = Map("wdlSource" -> BodyPart(HelloWorld.wdlSource()), "workflowInputs" -> BodyPart(s"[$inputs, $inputs]")) - Post(s"/workflows/$version/batch", - FormData(Seq("wdlSource" -> HelloWorld.wdlSource(), "workflowInputs" -> s"[$inputs, $inputs]"))) ~> + Post(s"/workflows/$version/batch", MultipartFormData(bodyParts)) ~> submitBatchRoute ~> check { assertResult( diff --git a/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala b/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala index 021e4a43c10..b32ceb1c958 100644 --- a/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala @@ -14,22 +14,22 @@ class EngineStatsActorSpec extends TestKitSuite("EngineStatsActor") with FlatSpe behavior of "EngineStatsActor" val replyTo = TestProbe() - val defaultTimeout = 100 millis + val defaultTimeout = 500 millis it should "return double zeros with no WorkflowActors" in { - TestActorRef(EngineStatsActor.props(List.empty[ActorRef], replyTo.ref)) + TestActorRef(EngineStatsActor.props(List.empty[ActorRef], replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(0, 0)) } it should "return snakeyes with a single workflow with one job" in { val workflowActors = List(Props(FakeWorkflowActor(1))) map { TestActorRef(_) } - TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref)) + TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(1, 1)) } it should "return an unemployed workflow when that's the world it lives in" in { val workflowActors = List(Props(FakeWorkflowActor(0))) map { TestActorRef(_) } - TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref)) + TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(1, 0)) } @@ -41,7 +41,7 @@ class EngineStatsActorSpec extends TestKitSuite("EngineStatsActor") with FlatSpe it should "return the summation of jobs for all WorkflowActors" in { val workflowActors = List(Props(FakeWorkflowActor(1)), Props(FakeWorkflowActor(2))) map { TestActorRef(_) } - TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref)) + TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(2, 3)) } } diff --git a/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala b/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala index 5ae82221bd4..9ecfeea4292 100644 --- a/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala @@ -22,15 +22,14 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik behavior of "MetadataParser" - val defaultTimeout = 100 millis + val defaultTimeout = 200 millis val mockServiceRegistry = TestProbe() - val parentProbe = TestProbe() - def assertMetadataResponse(action: MetadataServiceAction, queryReply: MetadataQuery, events: Seq[MetadataEvent], expectedRes: String) = { + val parentProbe = TestProbe() val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") metadataBuilder ! action // Ask for everything mockServiceRegistry.expectMsg(defaultTimeout, action) // TestActor runs on CallingThreadDispatcher @@ -96,7 +95,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik | "id": "$workflowA" |}""".stripMargin - val mdQuery = MetadataQuery(workflowA, None, None, None, None) + val mdQuery = MetadataQuery(workflowA, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, workflowAEvents, expectedRes) } @@ -113,8 +112,8 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik val events = eventList map { e => (e._1, MetadataValue(e._2), e._3) } map Function.tupled(makeEvent(workflow)) val expectedRes = s"""{ "calls": {}, $expectedJson, "id":"$workflow" }""" - val mdQuery = MetadataQuery(workflow, None, None, None, None) - val queryAction = GetSingleWorkflowMetadataAction(workflow, None, None) + val mdQuery = MetadataQuery(workflow, None, None, None, None, expandSubWorkflows = false) + val queryAction = GetSingleWorkflowMetadataAction(workflow, None, None, expandSubWorkflows = false) assertMetadataResponse(queryAction, mdQuery, events, expectedRes) } @@ -305,7 +304,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik | } """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } @@ -326,7 +325,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik |} """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } @@ -346,14 +345,14 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik |} """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } it should "render empty Json" in { val workflowId = WorkflowId.randomId() - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) val expectedEmptyResponse = """{}""" assertMetadataResponse(queryAction, mdQuery, List.empty, expectedEmptyResponse) @@ -383,7 +382,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik |} """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, emptyEvents, expectedEmptyResponse) @@ -398,4 +397,98 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik assertMetadataResponse(queryAction, mdQuery, valueEvents, expectedNonEmptyResponse) } + + it should "expand sub workflow metadata when asked for" in { + val mainWorkflowId = WorkflowId.randomId() + val subWorkflowId = WorkflowId.randomId() + + val mainEvents = List( + MetadataEvent(MetadataKey(mainWorkflowId, Option(MetadataJobKey("callA", None, 1)), "subWorkflowId"), MetadataValue(subWorkflowId)) + ) + + val subEvents = List( + MetadataEvent(MetadataKey(mainWorkflowId, None, "some"), MetadataValue("sub workflow info")) + ) + + val mainQuery = MetadataQuery(mainWorkflowId, None, None, None, None, expandSubWorkflows = true) + val mainQueryAction = GetMetadataQueryAction(mainQuery) + + val subQuery = MetadataQuery(subWorkflowId, None, None, None, None, expandSubWorkflows = true) + val subQueryAction = GetMetadataQueryAction(subQuery) + + val parentProbe = TestProbe() + val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") + metadataBuilder ! mainQueryAction + mockServiceRegistry.expectMsg(defaultTimeout, mainQueryAction) + mockServiceRegistry.reply(MetadataLookupResponse(mainQuery, mainEvents)) + mockServiceRegistry.expectMsg(defaultTimeout, subQueryAction) + mockServiceRegistry.reply(MetadataLookupResponse(subQuery, subEvents)) + + val expandedRes = + s""" + |{ + | "calls": { + | "callA": [ + | { + | "subWorkflowMetadata": { + | "some": "sub workflow info", + | "calls": {}, + | "id": "$subWorkflowId" + | }, + | "attempt": 1, + | "shardIndex": -1 + | } + | ] + | }, + | "id": "$mainWorkflowId" + |} + """.stripMargin + + parentProbe.expectMsgPF(defaultTimeout) { + case response: RequestComplete[(StatusCode, JsObject)] @unchecked => + response.response._1 shouldBe StatusCodes.OK + response.response._2 shouldBe expandedRes.parseJson + } + } + + it should "NOT expand sub workflow metadata when NOT asked for" in { + val mainWorkflowId = WorkflowId.randomId() + val subWorkflowId = WorkflowId.randomId() + + val mainEvents = List( + MetadataEvent(MetadataKey(mainWorkflowId, Option(MetadataJobKey("callA", None, 1)), "subWorkflowId"), MetadataValue(subWorkflowId)) + ) + + val queryNoExpand = MetadataQuery(mainWorkflowId, None, None, None, None, expandSubWorkflows = false) + val queryNoExpandAction = GetMetadataQueryAction(queryNoExpand) + + val parentProbe = TestProbe() + val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") + metadataBuilder ! queryNoExpandAction + mockServiceRegistry.expectMsg(defaultTimeout, queryNoExpandAction) + mockServiceRegistry.reply(MetadataLookupResponse(queryNoExpand, mainEvents)) + + + val nonExpandedRes = + s""" + |{ + | "calls": { + | "callA": [ + | { + | "subWorkflowId": "$subWorkflowId", + | "attempt": 1, + | "shardIndex": -1 + | } + | ] + | }, + | "id": "$mainWorkflowId" + |} + """.stripMargin + + parentProbe.expectMsgPF(defaultTimeout) { + case response: RequestComplete[(StatusCode, JsObject)] @unchecked => + response.response._1 shouldBe StatusCodes.OK + response.response._2 shouldBe nonExpandedRes.parseJson + } + } } diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala deleted file mode 100644 index e6f83b0e457..00000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala +++ /dev/null @@ -1,15 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.OpenOption - -object ContentTypeOption { - sealed trait ContentType - case object PlainText extends ContentType with OpenOption { - override def toString = "plain/text" - } - case object Json extends ContentType with OpenOption { - override def toString = "application/json" - } -} - - diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala deleted file mode 100644 index 5d45641de3b..00000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala +++ /dev/null @@ -1,23 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.attribute.{BasicFileAttributes, FileTime} - -import com.google.api.services.storage.Storage -import com.google.api.services.storage.model.StorageObject -import org.apache.commons.codec.digest.DigestUtils - -class GcsFileAttributes(path: NioGcsPath, storageClient: Storage) extends BasicFileAttributes { - override def fileKey(): AnyRef = DigestUtils.md5Hex(path.toString) - override def isRegularFile: Boolean = throw new NotImplementedError("To be implemented when/if needed") - override def isOther: Boolean = throw new NotImplementedError("To be implemented when/if needed") - override def lastModifiedTime(): FileTime = throw new NotImplementedError("To be implemented when/if needed") - override def size(): Long = { - val getObject = storageClient.objects.get(path.bucket, path.objectName) - val storageObject: StorageObject = getObject.execute() - storageObject.getSize.longValue() - } - override def isDirectory: Boolean = path.isDirectory - override def isSymbolicLink: Boolean = false - override def creationTime(): FileTime = throw new NotImplementedError("To be implemented when/if needed") - override def lastAccessTime(): FileTime = throw new NotImplementedError("To be implemented when/if needed") -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala deleted file mode 100644 index 215b18935bf..00000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala +++ /dev/null @@ -1,68 +0,0 @@ -package cromwell.filesystems.gcs - -import java.lang.Iterable -import java.nio.file._ -import java.nio.file.attribute.UserPrincipalLookupService -import java.nio.file.spi.FileSystemProvider -import java.util.{Collections, Set => JSet} - - -case class NotAGcsPathException(path: String) extends IllegalArgumentException(s"$path is not a valid GCS path.") - -object GcsFileSystem { - val Separator = "/" - private[gcs] val Scheme = "gs" - private[gcs] val Protocol = s"$Scheme://" - private val GsUriRegex = s"""$Protocol(.*)""".r - private val AttributeViews = Collections.singleton("basic") - - def isAbsoluteGcsPath(str: String) = str match { - case GsUriRegex(chunks) => true - case _ => false - } - - def apply(provider: GcsFileSystemProvider) = new GcsFileSystem(provider) -} - -/** - * Implements the java.nio.FileSystem interface for GoogleCloudStorage. - */ -class GcsFileSystem private(val gcsFileSystemProvider: GcsFileSystemProvider) extends FileSystem { - - import GcsFileSystem._ - - override def supportedFileAttributeViews(): JSet[String] = AttributeViews - - override def getSeparator: String = Separator - - override def getRootDirectories: Iterable[Path] = Collections.emptyList[Path] - - override def newWatchService(): WatchService = throw new NotImplementedError("GCS FS does not support Watch Service at this time") - - override def getFileStores: Iterable[FileStore] = Collections.emptyList() - - override def isReadOnly: Boolean = false - - override def provider(): FileSystemProvider = gcsFileSystemProvider - - override def isOpen: Boolean = true - - override def close(): Unit = throw new UnsupportedOperationException("GCS FS cannot be closed") - - override def getPathMatcher(syntaxAndPattern: String): PathMatcher = FileSystems.getDefault.getPathMatcher(syntaxAndPattern) - - override def getUserPrincipalLookupService: UserPrincipalLookupService = throw new UnsupportedOperationException() - - private def buildPath(first: String, more: Seq[String], forceDirectory: Boolean) = { - val directory = forceDirectory || (more.isEmpty && first.endsWith(Separator)) || more.lastOption.exists(_.endsWith(Separator)) - first match { - case GsUriRegex(chunks) => new NioGcsPath(chunks.split(Separator) ++ more.toArray[String], true, directory)(this) - case empty if empty.isEmpty => new NioGcsPath(Array.empty[String] ++ more.toArray[String], false, false)(this) - case _ => throw NotAGcsPathException(s"$first is not a gcs path") - } - } - - override def getPath(first: String, more: String*): Path = buildPath(first, more, forceDirectory = false) - - def getPathAsDirectory(first: String, more: String*): Path = buildPath(first, more, forceDirectory = true) -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala deleted file mode 100644 index 845ec29ef53..00000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala +++ /dev/null @@ -1,295 +0,0 @@ -package cromwell.filesystems.gcs - -import java.io.{FileNotFoundException, OutputStream} -import java.net.URI -import java.nio.channels.{Channels, SeekableByteChannel} -import java.nio.file.DirectoryStream.Filter -import java.nio.file._ -import java.nio.file.attribute.{BasicFileAttributes, FileAttribute, FileAttributeView} -import java.nio.file.spi.FileSystemProvider -import java.util -import java.util.Collections -import java.util.concurrent.{AbstractExecutorService, TimeUnit} - -import cats.instances.try_._ -import cats.syntax.functor._ -import com.google.api.client.googleapis.json.GoogleJsonResponseException -import com.google.api.client.googleapis.media.MediaHttpUploader -import com.google.api.services.storage.Storage -import com.google.api.services.storage.model.StorageObject -import com.google.cloud.hadoop.gcsio.{GoogleCloudStorageReadChannel, GoogleCloudStorageWriteChannel, ObjectWriteConditions} -import com.google.cloud.hadoop.util.{ApiErrorExtractor, AsyncWriteChannelOptions, ClientRequestHelper} -import com.typesafe.config.{Config, ConfigFactory, ConfigMemorySize} -import net.ceedubs.ficus.Ficus._ -import net.ceedubs.ficus.readers.ValueReader - -import scala.annotation.tailrec -import scala.collection.JavaConverters._ -import scala.concurrent.duration._ -import scala.concurrent.{ExecutionContext, ExecutionContextExecutorService} -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} - -object GcsFileSystemProvider { - def apply(storageClient: Storage)(implicit ec: ExecutionContext) = { - new GcsFileSystemProvider(Success(storageClient), ec) - } - - object AcceptAllFilter extends DirectoryStream.Filter[Path] { - override def accept(entry: Path): Boolean = true - } - - // To choose these numbers I first entered a prolonged period of personal consideration and deep thought. - // Then, at the end of this time, I decided to just pick some numbers arbitrarily. - private val retryInterval = 500 milliseconds - private val retryCount = 3 - - def withRetry[A](f: => A, retries: Int = retryCount): A = Try(f) match { - case Success(res) => res - case Failure(ex: GoogleJsonResponseException) - if retries > 0 && - (ex.getStatusCode == 404 || ex.getStatusCode == 500) => - // FIXME remove this sleep - Thread.sleep(retryInterval.toMillis) - withRetry(f, retries - 1) - case Failure(ex) => throw ex - } - - // TODO refactor as part of Ficus and submit a PR - implicit val configMemorySizeValueReader: ValueReader[ConfigMemorySize] = new ValueReader[ConfigMemorySize] { - override def read(config: Config, path: String): ConfigMemorySize = config.getMemorySize(path) - } -} - -/** - * Converts a Scala ExecutionContext to a Java ExecutorService. - * https://groups.google.com/forum/#!topic/scala-user/ZyHrfzD7eX8 - */ -object ExecutionContextExecutorServiceBridge { - def apply(ec: ExecutionContext): ExecutionContextExecutorService = ec match { - case null => throw new RuntimeException("Execution context cannot be null") - case eces: ExecutionContextExecutorService => eces - case executionContext => new AbstractExecutorService with ExecutionContextExecutorService { - override def prepare(): ExecutionContext = executionContext - override def isShutdown = false - override def isTerminated = false - override def shutdown() = () - override def shutdownNow() = Collections.emptyList[Runnable] - override def execute(runnable: Runnable): Unit = executionContext execute runnable - override def reportFailure(t: Throwable): Unit = executionContext reportFailure t - override def awaitTermination(length: Long,unit: TimeUnit): Boolean = false - } - } -} - -/** - * Implements java.nio.FileSystemProvider for GoogleCloudStorage - * This implementation is not complete and mostly a proof of concept that it's possible to *copy* around files from/to local/gcs. - * Copying is the only functionality that has been successfully tested (same and cross filesystems). - * - * If/when switching to Google's GCS NIO implementation, callers may need to implement various utilities built into - * this implementation, including: - * - * - Minimizing the upload buffer size, assuming the default is also on the order of megabytes of memory per upload - * - Automatically retrying transient errors - * - etc. - * - * @param storageClient Google API Storage object - * @param executionContext executionContext, will be used to perform async writes to GCS after being converted to a Java execution service - */ -class GcsFileSystemProvider private[gcs](storageClient: Try[Storage], val executionContext: ExecutionContext) extends FileSystemProvider { - import GcsFileSystemProvider._ - - private[this] lazy val config = ConfigFactory.load() - - // We want to throw an exception here if we try to use this class with a failed gcs interface - lazy val client = storageClient.get - private val executionService = ExecutionContextExecutorServiceBridge(executionContext) - private val errorExtractor = new ApiErrorExtractor() - def notAGcsPath(path: Path) = throw new IllegalArgumentException(s"$path is not a GCS path.") - - lazy val defaultFileSystem: GcsFileSystem = GcsFileSystem(this) - - private def exists(path: Path): Unit = path match { - case gcsPath: NioGcsPath => - val attempt: Try[Any] = Try(withRetry(client.objects.get(gcsPath.bucket, gcsPath.objectName).execute)) recover { - case ex: GoogleJsonResponseException - if ex.getStatusCode == 404 => if (!gcsPath.isDirectory) throw new FileNotFoundException(path.toString) - } - attempt.void.get - case _ => throw new FileNotFoundException(path.toString) - } - - /** - * Note: options and attributes are not honored. - */ - override def newByteChannel(path: Path, options: util.Set[_ <: OpenOption], attrs: FileAttribute[_]*): SeekableByteChannel = { - def createReadChannel(gcsPath: NioGcsPath) = new GoogleCloudStorageReadChannel(client, - gcsPath.bucket, - gcsPath.objectName, - errorExtractor, - new ClientRequestHelper[StorageObject]() - ) - - path match { - case gcsPath: NioGcsPath => withRetry(createReadChannel(gcsPath)) - case _ => notAGcsPath(path) - } - } - - /* - For now, default all upload buffers as small as possible, 256K per upload. Without this default the buffers are 64M. - In the future, we may possibly be able to pass information to the NioGcsPath with the expected... or Google's GCS NIO - implementation will be finished we'll need to revisit this issue again. - - See also: - - com.google.cloud.hadoop.util.AbstractGoogleAsyncWriteChannel.setUploadBufferSize - - com.google.api.client.googleapis.media.MediaHttpUploader.setContentAndHeadersOnCurrentRequest - */ - private[this] lazy val uploadBufferBytes = { - val configBytes = config.as[Option[ConfigMemorySize]]("google.upload-buffer-bytes").map(_.toBytes.toInt) - configBytes.getOrElse(MediaHttpUploader.MINIMUM_CHUNK_SIZE) - } - - /** - * Overrides the default implementation to provide a writable channel (which newByteChannel doesn't). - * NOTE: options are not honored. - */ - override def newOutputStream(path: Path, options: OpenOption*): OutputStream = { - val contentType = options collectFirst { - case e: ContentTypeOption.ContentType => e.toString - } getOrElse ContentTypeOption.PlainText.toString - - def initializeOutputStream(gcsPath: NioGcsPath) = { - val channel = new GoogleCloudStorageWriteChannel( - executionService, - client, - new ClientRequestHelper[StorageObject](), - gcsPath.bucket, - gcsPath.objectName, - AsyncWriteChannelOptions.newBuilder().setUploadBufferSize(uploadBufferBytes).build(), - new ObjectWriteConditions(), - Map.empty[String, String].asJava, - contentType) - channel.initialize() - Channels.newOutputStream(channel) - } - - path match { - case gcsPath: NioGcsPath => withRetry(initializeOutputStream(gcsPath)) - case _ => notAGcsPath(path) - } - } - - override def copy(source: Path, target: Path, options: CopyOption*): Unit = { - (source, target) match { - case (s: NioGcsPath, d: NioGcsPath) => - def innerCopy(): Unit = { - val storageObject = client.objects.get(s.bucket, s.objectName).execute - client.objects.copy(s.bucket, s.objectName, d.bucket, d.objectName, storageObject).execute - () - } - - withRetry(innerCopy()) - case _ => throw new UnsupportedOperationException(s"Can only copy from GCS to GCS: $source or $target is not a GCS path") - } - } - - override def delete(path: Path): Unit = { - path match { - case gcs: NioGcsPath => try { - withRetry { - client.objects.delete(gcs.bucket, gcs.objectName).execute() - () - } - } catch { - case ex: GoogleJsonResponseException if ex.getStatusCode == 404 => throw new NoSuchFileException(path.toString) - } - case _ => notAGcsPath(path) - } - } - - override def readAttributes[A <: BasicFileAttributes](path: Path, `type`: Class[A], options: LinkOption*): A = path match { - case gcsPath: NioGcsPath => - exists(path) - new GcsFileAttributes(gcsPath, client).asInstanceOf[A] - case _ => notAGcsPath(path) - } - - override def move(source: Path, target: Path, options: CopyOption*): Unit = { - (source, target) match { - case (s: NioGcsPath, d: NioGcsPath) => - def moveInner(): Unit = { - val storageObject = client.objects.get(s.bucket, s.objectName).execute - client.objects.rewrite(s.bucket, s.objectName, d.bucket, d.objectName, storageObject).execute - () - } - - withRetry(moveInner()) - case _ => throw new UnsupportedOperationException(s"Can only move from GCS to GCS: $source or $target is not a GCS path") - } - } - - def crc32cHash(path: Path) = path match { - case gcsDir: NioGcsPath => withRetry(client.objects().get(gcsDir.bucket, gcsDir.objectName).execute().getCrc32c) - case _ => notAGcsPath(path) - } - - override def checkAccess(path: Path, modes: AccessMode*): Unit = { exists(path); () } - override def createDirectory(dir: Path, attrs: FileAttribute[_]*): Unit = {} - - override def getFileSystem(uri: URI): FileSystem = defaultFileSystem - - override def isHidden(path: Path): Boolean = throw new NotImplementedError() - - private[this] lazy val maxResults = config.as[Option[Int]]("google.list-max-results").getOrElse(1000).toLong - - private def list(gcsDir: NioGcsPath) = { - val listRequest = client.objects().list(gcsDir.bucket).setMaxResults(maxResults) - listRequest.setPrefix(gcsDir.objectName) - - def objectToPath(storageObject: StorageObject): Path = { - NioGcsPath(s"$getScheme${storageObject.getBucket}${GcsFileSystem.Separator}${storageObject.getName}")(gcsDir.getFileSystem.asInstanceOf[GcsFileSystem]) - } - - // Contains a Seq corresponding to the current page of objects, plus a token for the next page of objects, if any. - case class ListPageResult(objects: Seq[StorageObject], nextPageToken: Option[String]) - - def requestListPage(pageToken: Option[String]): ListPageResult = { - val objects = withRetry(listRequest.setPageToken(pageToken.orNull).execute()) - ListPageResult(objects.getItems.asScala, Option(objects.getNextPageToken)) - } - - @tailrec - def remainingObjects(pageToken: Option[String], acc: Seq[StorageObject]): Seq[StorageObject] = { - if (pageToken.isEmpty) acc - else { - val page = requestListPage(pageToken) - remainingObjects(page.nextPageToken, acc ++ page.objects) - } - } - - val firstPage = requestListPage(pageToken = None) - val allObjects = remainingObjects(firstPage.nextPageToken, firstPage.objects) - - new DirectoryStream[Path] { - override def iterator(): util.Iterator[Path] = (allObjects map objectToPath).toIterator.asJava - override def close(): Unit = {} - } - } - - override def newDirectoryStream(dir: Path, filter: Filter[_ >: Path]): DirectoryStream[Path] = dir match { - case gcsDir: NioGcsPath => list(gcsDir) - case _ => notAGcsPath(dir) - } - override def setAttribute(path: Path, attribute: String, value: scala.Any, options: LinkOption*): Unit = throw new NotImplementedError() - override def getPath(uri: URI): Path = throw new NotImplementedError() - override def newFileSystem(uri: URI, env: util.Map[String, _]): FileSystem = { - throw new UnsupportedOperationException("GcsFileSystem provider doesn't support creation of new FileSystems at this time. Use getFileSystem instead.") - } - override def readAttributes(path: Path, attributes: String, options: LinkOption*): util.Map[String, AnyRef] = throw new NotImplementedError() - override def isSameFile(path: Path, path2: Path): Boolean = throw new NotImplementedError() - override def getFileAttributeView[V <: FileAttributeView](path: Path, `type`: Class[V], options: LinkOption*): V = throw new NotImplementedError() - override def getFileStore(path: Path): FileStore = throw new NotImplementedError() - override def getScheme: String = GcsFileSystem.Protocol -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala new file mode 100644 index 00000000000..0649da9b7b3 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala @@ -0,0 +1,100 @@ +package cromwell.filesystems.gcs + +import java.net.URI +import java.nio.file.Path +import java.nio.file.spi.FileSystemProvider + +import akka.actor.ActorSystem +import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport +import com.google.api.client.json.jackson2.JacksonFactory +import com.google.cloud.RetryParams +import com.google.cloud.storage.StorageOptions +import com.google.cloud.storage.contrib.nio.{CloudStorageConfiguration, CloudStorageFileSystem, CloudStoragePath} +import com.google.common.base.Preconditions._ +import cromwell.core.WorkflowOptions +import cromwell.core.path.proxy.{PathProxy, RetryableFileSystemProviderProxy} +import cromwell.core.path.{CustomRetryParams, PathBuilder} +import cromwell.filesystems.gcs.auth.GoogleAuthMode + +import scala.util.{Failure, Try} + +object GcsPathBuilder { + + val JsonFactory = JacksonFactory.getDefaultInstance + val HttpTransport = GoogleNetHttpTransport.newTrustedTransport + + def checkValid(uri: URI) = { + checkNotNull(uri.getScheme, s"%s does not have a gcs scheme", uri) + checkArgument( + uri.getScheme.equalsIgnoreCase(CloudStorageFileSystem.URI_SCHEME), + "Cloud Storage URIs must have '%s' scheme: %s", + CloudStorageFileSystem.URI_SCHEME, + uri + ) + checkNotNull(uri.getHost, s"%s does not have a host", uri) + } + + def isValidGcsUrl(str: String): Boolean = { + Try(checkValid(URI.create(str))).isSuccess + } + + def isGcsPath(path: Path): Boolean = { + path.getFileSystem.provider().getScheme == CloudStorageFileSystem.URI_SCHEME + } +} + +class GcsPathBuilder(authMode: GoogleAuthMode, + retryParams: RetryParams, + cloudStorageConfiguration: CloudStorageConfiguration, + options: WorkflowOptions) extends PathBuilder { + authMode.validate(options) + + protected val storageOptions = StorageOptions.builder() + .authCredentials(authMode.authCredentials(options)) + .retryParams(retryParams) + .build() + + // The CloudStorageFileSystemProvider constructor is not public. Currently the only way to obtain one is through a CloudStorageFileSystem + // Moreover at this point we can use the same provider for all operations as we have usable credentials + // In order to avoid recreating a provider with every getPath call, create a dummy FileSystem just to get its provider + protected val _provider = CloudStorageFileSystem.forBucket("dummy", cloudStorageConfiguration, storageOptions).provider() + + protected def provider: FileSystemProvider = _provider + /* + * The StorageService already contains a StorageRpc object that contains a com.google.api.services.storage.Storage object + * However it is not accessible from StorageService. + * com.google.cloud.storage.Storage has some batching capabilities but not for copying. + * In order to support batch copy, we need a com.google.api.services.storage.Storage. + */ + def getHash(path: Path): Try[String] = { + path match { + case gcsPath: CloudStoragePath => Try(storageOptions.service().get(gcsPath.bucket(), gcsPath.toRealPath().toString).crc32c()) + case proxy: PathProxy => + val gcsPath = proxy.unbox(classOf[CloudStoragePath]).get + Try(storageOptions.service().get(gcsPath.bucket(), gcsPath.toRealPath().toString).crc32c()) + case other => Failure(new IllegalArgumentException(s"$other is not a CloudStoragePath")) + } + } + + def build(string: String): Try[Path] = { + Try { + val uri = URI.create(string) + GcsPathBuilder.checkValid(uri) + provider.getPath(uri) + } + } + + override def name: String = "Gcs" +} + +class RetryableGcsPathBuilder(authMode: GoogleAuthMode, + googleRetryParams: RetryParams, + customRetryParams: CustomRetryParams, + cloudStorageConfiguration: CloudStorageConfiguration, + options: WorkflowOptions)(implicit actorSystem: ActorSystem) + extends GcsPathBuilder(authMode, googleRetryParams, cloudStorageConfiguration, options) { + + override protected def provider = new RetryableFileSystemProviderProxy(_provider, customRetryParams) + + override def getHash(path: Path) = provider.withRetry(() => super.getHash(path)) +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala new file mode 100644 index 00000000000..83aad3ce850 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala @@ -0,0 +1,48 @@ +package cromwell.filesystems.gcs + +import akka.actor.ActorSystem +import com.google.api.client.googleapis.media.MediaHttpUploader +import com.google.cloud.RetryParams +import com.google.cloud.storage.contrib.nio.CloudStorageConfiguration +import com.typesafe.config.ConfigFactory +import cromwell.core.WorkflowOptions +import cromwell.core.path.{CustomRetryParams, PathBuilderFactory} +import cromwell.filesystems.gcs.auth.GoogleAuthMode +import net.ceedubs.ficus.Ficus._ + +object GcsPathBuilderFactory { + + private[this] lazy val UploadBufferBytes = { + ConfigFactory.load().as[Option[Int]]("google.upload-buffer-bytes").getOrElse(MediaHttpUploader.MINIMUM_CHUNK_SIZE) + } + + val DefaultRetryParams = RetryParams.defaultInstance() + val DefaultCloudStorageConfiguration = { + CloudStorageConfiguration.builder() + .blockSize(UploadBufferBytes) + .permitEmptyPathComponents(true) + .stripPrefixSlash(true) + .usePseudoDirectories(true) + .build() + } +} + +case class GcsPathBuilderFactory(authMode: GoogleAuthMode, + retryParams: RetryParams = GcsPathBuilderFactory.DefaultRetryParams, + cloudStorageConfiguration: CloudStorageConfiguration = GcsPathBuilderFactory.DefaultCloudStorageConfiguration) + + extends PathBuilderFactory { + + def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem) = new GcsPathBuilder(authMode, retryParams, cloudStorageConfiguration, options) +} + +case class RetryableGcsPathBuilderFactory(authMode: GoogleAuthMode, + googleRetryParams: RetryParams = GcsPathBuilderFactory.DefaultRetryParams, + customRetryParams: CustomRetryParams = CustomRetryParams.Default, + cloudStorageConfiguration: CloudStorageConfiguration = GcsPathBuilderFactory.DefaultCloudStorageConfiguration) + + + extends PathBuilderFactory { + + def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem) = new RetryableGcsPathBuilder(authMode, googleRetryParams, customRetryParams, cloudStorageConfiguration, options) +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala deleted file mode 100644 index 2930cc91149..00000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala +++ /dev/null @@ -1,186 +0,0 @@ -package cromwell.filesystems.gcs - -import java.io.{FileNotFoundException, IOException, InputStreamReader} -import java.nio.file.{Files, Paths} - -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp -import com.google.api.client.googleapis.auth.oauth2.{GoogleAuthorizationCodeFlow, GoogleClientSecrets, GoogleCredential} -import com.google.api.client.googleapis.extensions.java6.auth.oauth2.GooglePromptReceiver -import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport -import com.google.api.client.json.JsonFactory -import com.google.api.client.json.jackson2.JacksonFactory -import com.google.api.client.util.store.FileDataStoreFactory -import com.google.api.services.storage.{Storage, StorageScopes} -import cromwell.filesystems.gcs.GoogleAuthMode.{GcsScopes, GoogleAuthOptions} -import org.slf4j.LoggerFactory - -import scala.collection.JavaConverters._ -import scala.util.{Failure, Success, Try} - -object GoogleAuthMode { - - lazy val jsonFactory = JacksonFactory.getDefaultInstance - lazy val httpTransport = GoogleNetHttpTransport.newTrustedTransport - val RefreshTokenOptionKey = "refresh_token" - - /** - * Before it returns the raw credential, checks if the token will expire within 60 seconds. - * - * TODO: Needs more design / testing around thread safety. - * For example, the credential returned is mutable, and may be modified by another thread. - * - * Most Google clients have the ability to refresh tokens automatically, as they use the standard Google - * HttpTransport that automatically triggers credential refreshing via Credential.handleResponse. Since Cromwell - * contacts https://gcr.io directly via HTTP requests using spray-client, we need to keep the token fresh ourselves. - * - * @see Credential#handleResponse(HttpRequest, HttpResponse, boolean) - */ - implicit class EnhancedCredentials(val credential: Credential) extends AnyVal { - def freshCredential: Try[Credential] = { - val stillValid = Option(credential.getExpiresInSeconds).exists(_ > 60) - if (stillValid || credential.refreshToken()) { - Success(credential) - } else { - Failure(new Exception("Unable to refresh token")) - } - } - } - - def buildStorage(credential: Credential, applicationName: String) = { - new Storage.Builder( - httpTransport, - jsonFactory, - credential).setApplicationName(applicationName).build() - } - - trait GoogleAuthOptions { - def get(key: String): Try[String] - } - - val GcsScopes = List( - StorageScopes.DEVSTORAGE_FULL_CONTROL, - StorageScopes.DEVSTORAGE_READ_WRITE - ) -} - - -sealed trait GoogleAuthMode { - def credential(options: GoogleAuthOptions): Credential - - def assertWorkflowOptions(options: GoogleAuthOptions): Unit = () - - def name: String - - def requiresAuthFile: Boolean = false - - protected lazy val log = LoggerFactory.getLogger(getClass.getSimpleName) - - protected def validateCredentials(credential: Credential) = { - Try(credential.refreshToken()) match { - case Failure(ex) => throw new RuntimeException(s"Google credentials are invalid: ${ex.getMessage}") - case Success(_) => credential - } - } - - def buildStorage(options: GoogleAuthOptions, applicationName: String): Storage = { - GoogleAuthMode.buildStorage(credential(options), applicationName) - } -} - -final case class ServiceAccountMode(override val name: String, accountId: String, pemPath: String, scopes: List[String] = GcsScopes) extends GoogleAuthMode { - import GoogleAuthMode._ - - private lazy val credentials: Credential = { - val pemFile = Paths.get(pemPath).toAbsolutePath - if (!Files.exists(pemFile)) { - throw new FileNotFoundException(s"PEM file $pemFile does not exist") - } - validateCredentials( - new GoogleCredential.Builder().setTransport(httpTransport) - .setJsonFactory(jsonFactory) - .setServiceAccountId(accountId) - .setServiceAccountScopes(scopes.asJava) - .setServiceAccountPrivateKeyFromPemFile(pemFile.toFile) - .build() - ) - } - - override def credential(options: GoogleAuthOptions) = credentials -} - -final case class UserMode(override val name: String, user: String, secretsFile: String, datastoreDir: String, scopes: List[String] = GcsScopes) extends GoogleAuthMode { - import GoogleAuthMode._ - - private def filePathToSecrets(secrets: String, jsonFactory: JsonFactory) = { - val secretsPath = Paths.get(secrets).toAbsolutePath - if(!Files.isReadable(secretsPath)) { - log.warn("Secrets file does not exist or is not readable.") - } - val secretStream = new InputStreamReader(Files.newInputStream(secretsPath)) - - GoogleClientSecrets.load(jsonFactory, secretStream) - } - - private lazy val credentials: Credential = { - val clientSecrets = filePathToSecrets(secretsFile, jsonFactory) - val dataStore = Paths.get(datastoreDir).toAbsolutePath - val dataStoreFactory = new FileDataStoreFactory(dataStore.toFile) - val flow = new GoogleAuthorizationCodeFlow.Builder(httpTransport, - jsonFactory, - clientSecrets, - scopes.asJava).setDataStoreFactory(dataStoreFactory).build - validateCredentials(new AuthorizationCodeInstalledApp(flow, new GooglePromptReceiver).authorize(user)) - } - - override def credential(options: GoogleAuthOptions) = credentials -} - -// It would be goofy to have multiple auths that are application_default, but Cromwell won't prevent it. -final case class ApplicationDefaultMode(override val name: String, scopes: List[String] = GcsScopes) extends GoogleAuthMode { - import GoogleAuthMode._ - - private lazy val credentials: Credential = { - try { - validateCredentials(GoogleCredential.getApplicationDefault().createScoped(scopes.asJava)) - } catch { - case e: IOException => - log.warn("Failed to get application default credentials", e) - throw e - } - } - - override def credential(options: GoogleAuthOptions) = credentials -} - -final case class RefreshTokenMode(name: String, clientId: String, clientSecret: String) extends GoogleAuthMode with ClientSecrets { - import GoogleAuthMode._ - - override def requiresAuthFile = true - - /** - * Throws if the refresh token is not specified. - */ - override def assertWorkflowOptions(options: GoogleAuthOptions): Unit = { getToken(options); () } - - private def getToken(options: GoogleAuthOptions): String = { - options.get(RefreshTokenOptionKey).getOrElse(throw new IllegalArgumentException(s"Missing parameters in workflow options: $RefreshTokenOptionKey")) - } - - override def credential(options: GoogleAuthOptions): Credential = { - validateCredentials( - new GoogleCredential.Builder().setTransport(httpTransport) - .setJsonFactory(jsonFactory) - .setClientSecrets(clientId, clientSecret) - .build() - .setRefreshToken(getToken(options)) - ) - } -} - -trait ClientSecrets { - val clientId: String - val clientSecret: String -} - -final case class SimpleClientSecrets(clientId: String, clientSecret: String) extends ClientSecrets diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala index 9c557983921..8fa93b61da8 100644 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala @@ -7,14 +7,12 @@ import cats.syntax.traverse._ import cats.syntax.validated._ import com.google.api.services.storage.StorageScopes import com.typesafe.config.Config +import cromwell.filesystems.gcs.auth._ import lenthall.config.ConfigValidationException import lenthall.config.ValidatedConfig._ import cromwell.core.ErrorOr._ import org.slf4j.LoggerFactory -import scala.collection.JavaConverters._ - - final case class GoogleConfiguration private (applicationName: String, authsByName: Map[String, GoogleAuthMode]) { def auth(name: String): ErrorOr[GoogleAuthMode] = { @@ -28,15 +26,15 @@ final case class GoogleConfiguration private (applicationName: String, authsByNa } object GoogleConfiguration { - + import scala.collection.JavaConverters._ private val log = LoggerFactory.getLogger("GoogleConfiguration") - private val GoogleScopes = List( + val GoogleScopes = List( StorageScopes.DEVSTORAGE_FULL_CONTROL, StorageScopes.DEVSTORAGE_READ_WRITE, "https://www.googleapis.com/auth/genomics", "https://www.googleapis.com/auth/compute" - ) + ).asJava def apply(config: Config): GoogleConfiguration = { @@ -55,10 +53,10 @@ object GoogleConfiguration { } def refreshTokenAuth(authConfig: Config, name: String) = authConfig validateAny { - cfg => RefreshTokenMode(name, cfg.getString("client-id"), cfg.getString("client-secret")) + cfg => RefreshTokenMode(name, cfg.getString("client-id"), cfg.getString("client-secret"), GoogleScopes) } - def applicationDefaultAuth(name: String): ErrorOr[GoogleAuthMode] = ApplicationDefaultMode(name, GoogleScopes).validNel + def applicationDefaultAuth(name: String): ErrorOr[GoogleAuthMode] = ApplicationDefaultMode(name).validNel val name = authConfig.getString("name") val scheme = authConfig.getString("scheme") diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala deleted file mode 100644 index 65e148f7787..00000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala +++ /dev/null @@ -1,191 +0,0 @@ -package cromwell.filesystems.gcs - -import java.io.File -import java.net.URI -import java.nio.file.WatchEvent.{Kind, Modifier} -import java.nio.file._ -import java.util - -import scala.collection.JavaConverters._ -import scala.language.postfixOps -import scala.util.Try - -object NioGcsPath { - def apply(path: String)(implicit gcsFileSystem: GcsFileSystem) = gcsFileSystem.getPath(path) - - implicit class PathEnhanced(val path: Path) extends AnyVal { - def asGcsPath(implicit gcsFileSystem: GcsFileSystem) = path match { - case gcsPath: NioGcsPath => gcsPath - case otherPath: Path => getSoftPath(otherPath.toString).asInstanceOf[NioGcsPath] - case _ => throw new IllegalArgumentException("Only GcsPaths are supported.") - } - } - - /** Allow instantiation of a relative gcs path. - * Relative GCS paths can only be created via NioGcsPath methods (eg: subpath, getName...) but not through the GcsFileSystem.getPath method - * in order to avoid floating paths without root. It also ensures that a relative local path cannot mistakenly be parsed as a GCS path. - * */ - private def getSoftPath(first: String, more: String*)(implicit gcsFs: GcsFileSystem): Path = Try(gcsFs.getPath(first, more: _*)) recover { - case e: NotAGcsPathException => new NioGcsPath(first.split(GcsFileSystem.Separator) ++ more.toArray[String], false, first.endsWith(GcsFileSystem.Separator))(gcsFs) - } get - - val Protocol = GcsFileSystem.Protocol -} - -/** - * NOTE: Currently called NioGcsPath so it can exist alongside the current GcsPath class. - * If this approach was to be validated the current GcsPath class would be replaced by this one. - * This class proposes an implementation of the java.nio.Path interface for GoogleCloudStorage. - * The following methods are yet to be implemented: - * relativize - * compareTo - * @param chunks array containing all parts of the path in between separators - except the protocol (gs://) - * eg: gs://path/to/resource.txt -> chunks = [path, to, resource.txt] - * @param absolute true if this path is to be considered absolute. - * Only absolute GCS paths can be used to actually locate resources. - * Calling methods on an absolute path can return a relative paths (eg subpath). - * @param gcsFileSystem the gcsFileSystem to be used when performing operations on this path - */ -class NioGcsPath(private val chunks: Array[String], absolute: Boolean, val isDirectory: Boolean)(implicit gcsFileSystem: GcsFileSystem) extends Path { - import NioGcsPath._ - - private val separator = GcsFileSystem.Separator - - private val objectChunks = chunks match { - case values if isAbsolute && values.nonEmpty => values.tail - case _ => chunks - } - - private val fullPath = chunksToString(chunks) - - lazy val bucket: String = chunks match { - case values if values.isEmpty && isAbsolute => throw new IllegalStateException("An absolute gcs path cannot be empty") - case _ => if(isAbsolute) chunks.head else { - throw new UnsupportedOperationException("Attached gcs filesystem has no root and is not Absolute. The corresponding bucket is unknown.") - } - } - - val objectName = chunksToString(objectChunks) - - private def chunksToString(chunksArray: Array[String]): String = chunksArray.mkString(separator) - - override def subpath(beginIndex: Int, endIndex: Int): Path = { - val directory = if (endIndex == chunks.length - 1) isDirectory else true - new NioGcsPath(chunks.slice(beginIndex, endIndex), isAbsolute && beginIndex == 0, directory) - } - - override def toFile: File = throw new UnsupportedOperationException("A GCS path cannot be converted to a File.") - - override def resolveSibling(other: Path): Path = { - val otherPath = other.asGcsPath - new NioGcsPath(getParent.asGcsPath.chunks ++ otherPath.chunks, isAbsolute, otherPath.isDirectory) - } - - override def resolveSibling(other: String): Path = { - val otherPath = getSoftPath(other).asGcsPath - new NioGcsPath(getParent.asGcsPath.chunks ++ getSoftPath(other).asGcsPath.chunks, isAbsolute, otherPath.isDirectory) - } - - override def getFileSystem: FileSystem = gcsFileSystem - - override def getName(index: Int): Path = { - val directory = if (index == chunks.length - 1) isDirectory else true - new NioGcsPath(Array(chunks(index)), isAbsolute && index == 0, directory) - } - - override def getParent: Path = chunks match { - case values if values.isEmpty || values.length == 1 => null - case values => new NioGcsPath(values.init, isAbsolute, true) - } - - override def toAbsolutePath: Path = if (isAbsolute) this else { - throw new UnsupportedOperationException(s"Attached gcs filesystem has no root. path $toString can't be converted to an absolute path.") - } - - override def relativize(other: Path): Path = other match { - case gcs: NioGcsPath => new NioGcsPath(gcs.chunks.diff(this.chunks), false, gcs.isDirectory) - case _ => throw new IllegalArgumentException(s"$other is not a GCS path.") - } - - override def getNameCount: Int = chunks.length - - override def toUri: URI = new URI(GcsFileSystem.Scheme, bucket, s"/$objectName", null) - - override def compareTo(other: Path): Int = throw new NotImplementedError() - - override def register(watcher: WatchService, events: Array[Kind[_]], modifiers: Modifier*): WatchKey = throw new UnsupportedOperationException() - - override def register(watcher: WatchService, events: Kind[_]*): WatchKey = throw new UnsupportedOperationException() - - override def getFileName: Path = chunks match { - case values if values.isEmpty => null - case _ => new NioGcsPath(Array(chunks.last), isAbsolute && chunks.length == 1, isDirectory) - } - - override def getRoot: Path = new NioGcsPath(Array(bucket), true, true) - - override def iterator(): util.Iterator[Path] = { - if (chunks.isEmpty) chunks.map(_.asInstanceOf[Path]).iterator.asJava else { - val init = chunks.init map { elt => new NioGcsPath(Array(elt), false, true).asInstanceOf[Path] } - val fullIterator = init :+ new NioGcsPath(Array(chunks.last), false, isDirectory).asInstanceOf[Path] - fullIterator.iterator.asJava - } - } - - override def normalize(): Path = if (isAbsolute) this else throw new UnsupportedOperationException("Cannot normalize a relative GCS path.") - - override def endsWith(other: Path): Boolean = { - other match { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case _: NioGcsPath => chunks.endsWith(other.asGcsPath.chunks) - case _ => false - } - } - - override def endsWith(other: String): Boolean = { - Try(getSoftPath(other)) map { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case path@(_: NioGcsPath) => chunks.endsWith(path.asGcsPath.chunks) - case _ => false - } getOrElse false - } - - override def resolve(other: Path): Path = { - if (other.isAbsolute) other - else { - val otherGcs = other.asGcsPath - new NioGcsPath(chunks ++ otherGcs.chunks, isAbsolute, otherGcs.isDirectory) - } - } - - override def resolve(other: String): Path = { - val otherPath = getSoftPath(other).asGcsPath - if (otherPath.isAbsolute) otherPath - else new NioGcsPath(chunks ++ otherPath.asGcsPath.chunks, isAbsolute, otherPath.isDirectory) - } - - override def toRealPath(options: LinkOption*): Path = this - - override def startsWith(other: Path): Boolean = { - other match { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case _: NioGcsPath => chunks.startsWith(other.asGcsPath.chunks) - case _ => false - } - } - - override def startsWith(other: String): Boolean = { - Try(getSoftPath(other)) map { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case path@(_: NioGcsPath) => chunks.startsWith(path.asGcsPath.chunks) - case _ => false - } getOrElse false - } - - override def toString: String = { - if (absolute) s"$Protocol$fullPath" - else fullPath - } - - override def isAbsolute: Boolean = absolute -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala new file mode 100644 index 00000000000..26d18833dc3 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala @@ -0,0 +1,187 @@ +package cromwell.filesystems.gcs.auth + +import java.io.{FileNotFoundException, InputStreamReader} +import java.nio.file.Paths + +import better.files._ +import com.google.api.client.auth.oauth2.Credential +import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp +import com.google.api.client.googleapis.auth.oauth2.{GoogleAuthorizationCodeFlow, GoogleClientSecrets, GoogleCredential} +import com.google.api.client.googleapis.extensions.java6.auth.oauth2.GooglePromptReceiver +import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport +import com.google.api.client.googleapis.testing.auth.oauth2.MockGoogleCredential +import com.google.api.client.json.jackson2.JacksonFactory +import com.google.api.client.util.store.FileDataStoreFactory +import com.google.api.services.storage.StorageScopes +import com.google.auth.oauth2.{ClientId, ServiceAccountCredentials} +import com.google.cloud.AuthCredentials +import cromwell.core.WorkflowOptions +import cromwell.filesystems.gcs.auth.GoogleAuthMode._ +import org.slf4j.LoggerFactory + +import scala.collection.JavaConverters._ +import scala.util.{Failure, Success, Try} + +object GoogleAuthMode { + + lazy val jsonFactory = JacksonFactory.getDefaultInstance + lazy val httpTransport = GoogleNetHttpTransport.newTrustedTransport + + val RefreshTokenOptionKey = "refresh_token" + val GcsScopes = List( + StorageScopes.DEVSTORAGE_FULL_CONTROL, + StorageScopes.DEVSTORAGE_READ_WRITE + ).asJava + + def checkReadable(file: File) = { + if (!file.isReadable) throw new FileNotFoundException(s"File $file does not exist or is not readable") + } + + case object NoAuthMode extends GoogleAuthMode { + override def name = "no_auth" + + override def authCredentials(options: WorkflowOptions): AuthCredentials = AuthCredentials.noAuth() + override def credential(options: WorkflowOptions): Credential = new MockGoogleCredential.Builder().build() + } +} + + +sealed trait GoogleAuthMode { + protected lazy val log = LoggerFactory.getLogger(getClass.getSimpleName) + + /** + * Validate the auth mode against provided options + */ + def validate(options: WorkflowOptions): Unit = {()} + + def name: String + // Create an AuthCredentials object from the google-cloud library (https://github.com/GoogleCloudPlatform/google-cloud-java using https://github.com/google/google-auth-library-java under the hood) + def authCredentials(options: WorkflowOptions): AuthCredentials + // Create a Credential object from the google.api.client.auth library (https://github.com/google/google-api-java-client) + def credential(options: WorkflowOptions): Credential + + def requiresAuthFile: Boolean = false + + protected def validateAuthCredentials(authCredentials: AuthCredentials, scopes: java.util.Collection[String]): AuthCredentials = validate(authCredentials, authCredentials.credentials().createScoped(scopes).refresh) + + protected def validateCredential(credential: Credential) = validate(credential, credential.refreshToken) + + private def validate[T](credential: T, validation: () => Any): T = { + Try(validation()) match { + case Failure(ex) => throw new RuntimeException(s"Google credentials are invalid: ${ex.getMessage}") + case Success(_) => credential + } + } +} + +final case class ServiceAccountMode(override val name: String, + accountId: String, + pemPath: String, + scopes: java.util.List[String]) extends GoogleAuthMode { + private val pemFile = File(pemPath) + checkReadable(pemFile) + + private lazy val _authCredentials: AuthCredentials = { + val saCredentials = ServiceAccountCredentials.fromPkcs8(accountId, accountId, pemFile.contentAsString, null, scopes) + validateAuthCredentials(AuthCredentials.createFor(saCredentials.getClientId, saCredentials.getPrivateKey), scopes) + } + + private lazy val _credential: Credential = { + validateCredential( + new GoogleCredential.Builder().setTransport(httpTransport) + .setJsonFactory(jsonFactory) + .setServiceAccountId(accountId) + .setServiceAccountScopes(scopes) + .setServiceAccountPrivateKeyFromPemFile(pemFile.toJava) + .build() + ) + } + + override def authCredentials(options: WorkflowOptions) = _authCredentials + + override def credential(options: WorkflowOptions): Credential = _credential +} + +final case class UserMode(override val name: String, + user: String, + val secretsPath: String, + datastoreDir: String, + scopes: java.util.List[String]) extends GoogleAuthMode { + + private lazy val secrets = { + val secretsFile = File(secretsPath) + checkReadable(secretsFile) + + val secretStream = new InputStreamReader(secretsFile.newInputStream) + + GoogleClientSecrets.load(jsonFactory, secretStream) + } + + private lazy val _credential: Credential = { + val dataStore = Paths.get(datastoreDir).toAbsolutePath + val dataStoreFactory = new FileDataStoreFactory(dataStore.toFile) + val flow = new GoogleAuthorizationCodeFlow.Builder(httpTransport, jsonFactory, secrets, scopes).setDataStoreFactory(dataStoreFactory).build + validateCredential(new AuthorizationCodeInstalledApp(flow, new GooglePromptReceiver).authorize(user)) + } + + private lazy val _authCredentials: AuthCredentials = { + new RefreshableOAuth2Credentials(_credential.getRefreshToken, new ClientId(secrets.getDetails.getClientId, secrets.getDetails.getClientSecret)) + } + + override def credential(options: WorkflowOptions) = _credential + + override def authCredentials(options: WorkflowOptions) = _authCredentials +} + +private object ApplicationDefault { + private [auth] lazy val _AuthCredentials = AuthCredentials.createApplicationDefaults() + private [auth] lazy val _Credential: Credential = GoogleCredential.getApplicationDefault() +} + +final case class ApplicationDefaultMode(name: String) extends GoogleAuthMode { + override def authCredentials(options: WorkflowOptions) = ApplicationDefault._AuthCredentials + override def credential(options: WorkflowOptions) = ApplicationDefault._Credential +} + +final case class RefreshTokenMode(name: String, + clientId: String, + clientSecret: String, + scopes: java.util.List[String]) extends GoogleAuthMode with ClientSecrets { + import GoogleAuthMode._ + override def requiresAuthFile = true + + private def extractRefreshToken(options: WorkflowOptions): String = { + options.get(RefreshTokenOptionKey) getOrElse { + throw new IllegalArgumentException(s"Missing parameters in workflow options: $RefreshTokenOptionKey") + } + } + + override def validate(options: WorkflowOptions) = { + extractRefreshToken(options) + + () + } + + override def authCredentials(options: WorkflowOptions): AuthCredentials = { + val refreshToken = extractRefreshToken(options) + validateAuthCredentials(new RefreshableOAuth2Credentials(refreshToken, new ClientId(clientId, clientSecret)), scopes) + } + + override def credential(options: WorkflowOptions): Credential = { + val refreshToken = extractRefreshToken(options) + validateCredential( + new GoogleCredential.Builder().setTransport(httpTransport) + .setJsonFactory(jsonFactory) + .setClientSecrets(clientId, clientSecret) + .build() + .setRefreshToken(refreshToken) + ) + } +} + +trait ClientSecrets { + val clientId: String + val clientSecret: String +} + +final case class SimpleClientSecrets(clientId: String, clientSecret: String) extends ClientSecrets diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala new file mode 100644 index 00000000000..ae1e32ef526 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala @@ -0,0 +1,31 @@ +package cromwell.filesystems.gcs.auth + +import java.io.Serializable +import java.util.Objects + +import com.google.auth.oauth2.{ClientId, GoogleCredentials, UserCredentials} +import com.google.cloud.{AuthCredentials, RestorableState} + +class RefreshableOAuth2Credentials(refreshToken: String, clientId: ClientId) extends AuthCredentials { + private val _credentials: GoogleCredentials = new UserCredentials(clientId.getClientId, clientId.getClientSecret, refreshToken) + + private class RefreshableOAuth2CredentialsState(val refreshToken: String, val clientId: ClientId) extends RestorableState[AuthCredentials] with Serializable { + + override def restore: AuthCredentials = new RefreshableOAuth2Credentials(refreshToken, clientId) + + override def hashCode: Int = Objects.hash(refreshToken, clientId.getClientId, clientId.getClientSecret) + + override def equals(obj: Any): Boolean = { + obj.isInstanceOf[RefreshableOAuth2CredentialsState] && { + val other = obj.asInstanceOf[RefreshableOAuth2CredentialsState] + Objects.equals(refreshToken, other.refreshToken) && + Objects.equals(clientId.getClientId, other.clientId.getClientId) && + Objects.equals(clientId.getClientSecret, other.clientId.getClientSecret) + } + } + } + + override def credentials: GoogleCredentials = _credentials + + def capture: RestorableState[AuthCredentials] = new RefreshableOAuth2CredentialsState(refreshToken, clientId) +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala deleted file mode 100644 index 0ec2c03168c..00000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala +++ /dev/null @@ -1,6 +0,0 @@ -package cromwell.filesystems - - -package object gcs { - type RefreshToken = String -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala deleted file mode 100644 index 3c6a287348b..00000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala +++ /dev/null @@ -1,5 +0,0 @@ -package cromwell.filesystems.gcs - -import org.scalatest.Tag - -object GcsIntegrationTest extends Tag("GcsIntegrationTest") diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala new file mode 100644 index 00000000000..598cb546173 --- /dev/null +++ b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala @@ -0,0 +1,31 @@ +package cromwell.filesystems.gcs + +import com.google.cloud.RetryParams +import com.google.cloud.storage.contrib.nio.CloudStorageConfiguration +import cromwell.core.path.CustomRetryParams +import cromwell.core.path.proxy.RetryableFileSystemProviderProxy +import cromwell.core.{TestKitSuite, WorkflowOptions} +import cromwell.filesystems.gcs.auth.GoogleAuthMode +import org.scalatest.{FlatSpecLike, Matchers} + +class GcsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers { + + implicit val as = system + + behavior of "GcsPathBuilderSpec" + + it should "create a path with a retryable provider" in { + val retryablePathBuilder = new RetryableGcsPathBuilder( + GoogleAuthMode.NoAuthMode, + RetryParams.defaultInstance(), + CustomRetryParams.Default, + CloudStorageConfiguration.DEFAULT, + WorkflowOptions.empty + ) + + val path = retryablePathBuilder.build("gs://bucket/object") + path.isSuccess shouldBe true + path.get.getFileSystem.provider() shouldBe a[RetryableFileSystemProviderProxy[_]] + } + +} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala index 3eeeaf56817..cc1c9fd6c36 100644 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala +++ b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala @@ -1,6 +1,8 @@ package cromwell.filesystems.gcs +import better.files.File import com.typesafe.config.{ConfigException, ConfigFactory} +import cromwell.filesystems.gcs.auth.{ApplicationDefaultMode, RefreshTokenMode, ServiceAccountMode, UserMode} import lenthall.config.ConfigValidationException import org.scalatest.{FlatSpec, Matchers} @@ -10,8 +12,10 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { behavior of "GoogleConfiguration" it should "parse all manner of well-formed auths" in { + val mockFile = File.newTemporaryFile() + val righteousGoogleConfig = - """ + s""" |google { | application-name = "cromwell" | @@ -30,14 +34,14 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { | name = "name-user" | scheme = "user_account" | user = "me" - | secrets-file = "/very/secret/file.txt" + | secrets-file = "${mockFile.pathAsString}" | data-store-dir = "/where/the/data/at" | }, | { | name = "name-service" | scheme = "service_account" | service-account-id = "my-google-account" - | pem-file = "/yonder/file.pem" + | pem-file = "${mockFile.pathAsString}" | } | ] |} @@ -61,13 +65,15 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { val user = (auths collectFirst { case a: UserMode => a }).get user.name shouldBe "name-user" - user.secretsFile shouldBe "/very/secret/file.txt" + user.secretsPath shouldBe mockFile.pathAsString user.datastoreDir shouldBe "/where/the/data/at" val service = (auths collectFirst { case a: ServiceAccountMode => a }).get service.name shouldBe "name-service" service.accountId shouldBe "my-google-account" - service.pemPath shouldBe "/yonder/file.pem" + service.pemPath shouldBe mockFile.pathAsString + + mockFile.delete(true) } diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala deleted file mode 100644 index 541dc75eb8a..00000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala +++ /dev/null @@ -1,158 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.Paths - -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.googleapis.auth.oauth2.GoogleCredential -import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport -import com.google.api.client.json.jackson2.JacksonFactory -import com.typesafe.config.ConfigFactory -import cromwell.filesystems.gcs.GoogleAuthMode.EnhancedCredentials -import org.scalatest.{FlatSpec, Matchers} - -import scala.util.Try - -class GoogleCredentialFactorySpec extends FlatSpec with Matchers { - import GoogleCredentialFactorySpec._ - - behavior of "GoogleCredentialFactory" - - it should "refresh a token using user credentials" taggedAs GcsIntegrationTest in { - val credential = UserMode( - name = "user", - user = secretConf("user"), - secretsFile = secretConf("secrets-file"), - datastoreDir = secretConf("data-store-dir")).credential(emptyOptions) - - val firstCredentialTry: Try[Credential] = credential.freshCredential - assert(firstCredentialTry.isSuccess) - val firstCredential = firstCredentialTry.get - firstCredential.getAccessToken shouldNot be(empty) - - firstCredential.setExpiresInSeconds(59L) - - val secondCredentialTry: Try[Credential] = firstCredential.freshCredential - assert(secondCredentialTry.isSuccess) - - val secondCredential = secondCredentialTry.get - secondCredential.getAccessToken shouldNot be(empty) - secondCredential.getExpiresInSeconds shouldNot be(null) - secondCredential.getExpiresInSeconds.longValue should be > 60L - } - - it should "refresh a token using a service account" taggedAs GcsIntegrationTest in { - val credential = ServiceAccountMode( - name = "service", - accountId = secretConf("service-account-id"), - pemPath = secretConf("pem-file")).credential(emptyOptions) - - val firstCredentialTry: Try[Credential] = credential.freshCredential - assert(firstCredentialTry.isSuccess) - val firstCredential = firstCredentialTry.get - firstCredential.getAccessToken shouldNot be(empty) - - firstCredential.setExpiresInSeconds(59L) - - val secondCredentialTry: Try[Credential] = firstCredential.freshCredential - assert(secondCredentialTry.isSuccess) - - val secondCredential = secondCredentialTry.get - secondCredential.getAccessToken shouldNot be(empty) - secondCredential.getExpiresInSeconds shouldNot be(null) - secondCredential.getExpiresInSeconds.longValue should be > 60L - } - - it should "refresh a token using a refresh token" taggedAs GcsIntegrationTest in { - val opts = GoogleOptionsMap(Map("refresh_token" -> secretConf("refresh_token"))) - - val credential = RefreshTokenMode(name = "refresh", - clientId = secretConf("client-id"), - clientSecret = secretConf("client-secret")).credential(opts) - - val firstUserCredentialsTry = credential.freshCredential - - assert(firstUserCredentialsTry.isSuccess) - val firstUserCredentials = firstUserCredentialsTry.get - - val firstRefreshedUserCredentialsTry: Try[Credential] = firstUserCredentials.freshCredential - assert(firstRefreshedUserCredentialsTry.isSuccess) - val firstRefreshedUserCredentials = firstRefreshedUserCredentialsTry.get - firstRefreshedUserCredentials.getAccessToken shouldNot be(empty) - - firstRefreshedUserCredentials.setExpiresInSeconds(59L) - - val secondRefreshedUserCredentialsTry: Try[Credential] = firstRefreshedUserCredentials.freshCredential - assert(secondRefreshedUserCredentialsTry.isSuccess) - - val secondRefreshedUserCredentials = secondRefreshedUserCredentialsTry.get - secondRefreshedUserCredentials.getAccessToken shouldNot be(empty) - secondRefreshedUserCredentials.getExpiresInSeconds shouldNot be(null) - secondRefreshedUserCredentials.getExpiresInSeconds.longValue should be > 60L - } - - it should "not refresh an empty token" in { - - val wrongCredentials = new GoogleCredential.Builder() - .setTransport(GoogleNetHttpTransport.newTrustedTransport) - .setJsonFactory(JacksonFactory.getDefaultInstance) - .setClientSecrets("fakeId", "fakeSecret") - .build() - - val exception = wrongCredentials.freshCredential.failed.get - - exception.getMessage should be("Unable to refresh token") - } - - it should "refresh a token using application default credentials" taggedAs GcsIntegrationTest in { - val credential = applicationDefaultCredential - - val firstCredentialTry: Try[Credential] = credential.freshCredential - assert(firstCredentialTry.isSuccess) - val firstCredential = firstCredentialTry.get - firstCredential.getAccessToken shouldNot be(empty) - - firstCredential.setExpiresInSeconds(59L) - - val secondCredentialTry: Try[Credential] = firstCredential.freshCredential - assert(secondCredentialTry.isSuccess) - - val secondCredential = secondCredentialTry.get - secondCredential.getAccessToken shouldNot be(empty) - secondCredential.getExpiresInSeconds shouldNot be(null) - secondCredential.getExpiresInSeconds.longValue should be > 60L - } -} - -object GoogleCredentialFactorySpec { - /* - - To run this integration spec, your cromwell-credentials.conf file should have the following keys for the listed tests: - - // For testing UserMode - user = "" - secrets-file = "" - data-store-dir = "" - - // For testing ServiceAccountMode - service-account-id = "" - pem-file = "" - - // For testing RefreshTokenMode - client-id = "" - client-secret = "" - refresh_token = "" - - */ - - private lazy val credentialsConfig = ConfigFactory.parseFile(Paths.get("cromwell-credentials.conf").toFile) - - private def secretConf(path: String) = credentialsConfig.getString(path) - - private val emptyOptions = GoogleOptionsMap(Map.empty) - - def applicationDefaultCredential = ApplicationDefaultMode(name = "default").credential(emptyOptions) -} - -case class GoogleOptionsMap(map: Map[String, String]) extends GoogleAuthMode.GoogleAuthOptions { - override def get(key: String): Try[String] = Try { map(key) } -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala deleted file mode 100644 index af569d7f0c1..00000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.filesystems.gcs - -import scala.util.Failure - -object MockGcsFileSystemBuilder { - val mockGcsFileSystem = new GcsFileSystemProvider( - Failure(new Exception("No Storage object available")), - scala.concurrent.ExecutionContext.global).defaultFileSystem -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala deleted file mode 100644 index ccb35efa622..00000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala +++ /dev/null @@ -1,291 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.Path - -import org.scalatest.mockito.MockitoSugar -import org.scalatest.prop.TableDrivenPropertyChecks._ -import org.scalatest.prop.Tables.Table -import org.scalatest.{FlatSpec, Matchers} - -class NioGcsPathSpec extends FlatSpec with Matchers with MockitoSugar { - - behavior of "NioGcsPath" - - implicit val GCSFs = MockGcsFileSystemBuilder.mockGcsFileSystem - - it should "implement toString" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - absPath1.toString shouldBe "gs://absolute/path/to/somewhere" - relPath1.toString shouldBe "some/relative/path" - } - - it should "implement subpath" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val absSub1 = absPath1.subpath(0, 2) - absSub1.isAbsolute shouldBe true - absSub1.toString shouldBe "gs://absolute/path" - - val absSub2 = absPath1.subpath(1, 2) - absSub2.isAbsolute shouldBe false - absSub2.toString shouldBe "path" - - val relSub1 = relPath1.subpath(0, 2) - relSub1.isAbsolute shouldBe false - relSub1.toString shouldBe "some/relative" - } - - it should "implement resolveSibling" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - val relPath2 = new NioGcsPath(Array("another", "relative", "resource", "path"), false, true) - - val absSibling = absPath1.resolveSibling("somewhere else") - absSibling.isAbsolute shouldBe true - absSibling.toString shouldBe "gs://absolute/path/to/somewhere else" - - val absSiblingPath = absPath1.resolveSibling(relPath1) - absSiblingPath.isAbsolute shouldBe true - absSiblingPath.toString shouldBe "gs://absolute/path/to/some/relative/path" - - val absRel = relPath1.resolveSibling("other path") - absRel.isAbsolute shouldBe false - absRel.toString shouldBe "some/relative/other path" - - val absRelPath = relPath1.resolveSibling(relPath2) - absRelPath.isAbsolute shouldBe false - absRelPath.toString shouldBe "some/relative/another/relative/resource/path" - } - - it should "implement resolve" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val absPath2 = new NioGcsPath(Array("absolute", "location"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - val relPath2 = new NioGcsPath(Array("another", "relative", "resource", "path"), false, true) - - val absToRel = absPath1.resolve(relPath1) - absToRel.isAbsolute shouldBe true - absToRel.toString shouldBe "gs://absolute/path/to/somewhere/some/relative/path" - - val absToAbs = absPath1.resolve(absPath2) - absToAbs.isAbsolute shouldBe true - absToAbs.toString shouldBe "gs://absolute/location" - - val relToAbs = relPath1.resolve(absPath1) - relToAbs.isAbsolute shouldBe true - relToAbs.toString shouldBe "gs://absolute/path/to/somewhere" - - val relToRel = relPath1.resolve(relPath2) - relToRel.isAbsolute shouldBe false - relToRel.toString shouldBe "some/relative/path/another/relative/resource/path" - } - - it should "implement getName" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val nameAbs1 = absPath1.getName(0) - nameAbs1.isAbsolute shouldBe true - nameAbs1.toString shouldBe "gs://absolute" - - val nameAbs2 = absPath1.getName(1) - nameAbs2.isAbsolute shouldBe false - nameAbs2.toString shouldBe "path" - - val nameRel1 = relPath1.getName(0) - nameRel1.isAbsolute shouldBe false - nameRel1.toString shouldBe "some" - - val nameRel2 = relPath1.getName(1) - nameRel2.isAbsolute shouldBe false - nameRel2.toString shouldBe "relative" - } - - it should "implement getParent" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singleton = new NioGcsPath(Array("singleton"), true, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val parentAbs1 = absPath1.getParent - parentAbs1.isAbsolute shouldBe true - parentAbs1.toString shouldBe "gs://absolute/path/to" - - empty.getParent shouldBe null - singleton.getParent shouldBe null - - val nameRel1 = relPath1.getParent - nameRel1.isAbsolute shouldBe false - nameRel1.toString shouldBe "some/relative" - } - - it should "implement toAbsolutePath" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val abs = absPath1.toAbsolutePath - abs.isAbsolute shouldBe true - abs.toString shouldBe "gs://absolute/path/to/somewhere" - - an[Exception] shouldBe thrownBy(relPath1.toAbsolutePath) - } - - it should "implement getNameCount" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singleton = new NioGcsPath(Array("singleton"), true, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - absPath1.getNameCount shouldBe 4 - relPath1.getNameCount shouldBe 3 - empty.getNameCount shouldBe 0 - singleton.getNameCount shouldBe 1 - } - - it should "implement getFileName" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singletonAbs = new NioGcsPath(Array("singleton"), true, true) - val singletonRel = new NioGcsPath(Array("singleton"), false, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val emptyFileName = empty.getFileName - emptyFileName shouldBe null - - val singletonAbsFileName = singletonAbs.getFileName - singletonAbsFileName.isAbsolute shouldBe true - singletonAbsFileName.toString shouldBe "gs://singleton" - - val singletonRelFileName = singletonRel.getFileName - singletonRelFileName.isAbsolute shouldBe false - singletonRelFileName.toString shouldBe "singleton" - - val relFileName = relPath1.getFileName - relFileName.isAbsolute shouldBe false - relFileName.toString shouldBe "path" - - val absFileName = absPath1.getFileName - absFileName.isAbsolute shouldBe false - absFileName.toString shouldBe "somewhere" - } - - it should "implement getIterator" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singletonAbs = new NioGcsPath(Array("singleton"), true, true) - val singletonRel = new NioGcsPath(Array("singleton"), false, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - empty.iterator().hasNext shouldBe false - - val singletonAbsIterator = singletonAbs.iterator() - val nextAbsSingleton: Path = singletonAbsIterator.next() - nextAbsSingleton.isAbsolute shouldBe false - nextAbsSingleton.toString shouldBe "singleton" - singletonAbsIterator.hasNext shouldBe false - - val singletonRelIterator = singletonRel.iterator() - val nextRelSingleton: Path = singletonRelIterator.next() - nextRelSingleton.isAbsolute shouldBe false - nextRelSingleton.toString shouldBe "singleton" - singletonRelIterator.hasNext shouldBe false - - val relIterator = relPath1.iterator() - val nextRel: Path = relIterator.next() - nextRel.isAbsolute shouldBe false - nextRel.toString shouldBe "some" - relIterator.next().toString shouldBe "relative" - relIterator.next().toString shouldBe "path" - relIterator.hasNext shouldBe false - - val absIterator = absPath1.iterator() - val absRel: Path = absIterator.next() - absRel.isAbsolute shouldBe false - absRel.toString shouldBe "absolute" - absIterator.next().toString shouldBe "path" - absIterator.next().toString shouldBe "to" - absIterator.next().toString shouldBe "somewhere" - absIterator.hasNext shouldBe false - } - - it should "implement startsWith" in { - val empty = new NioGcsPath(Array.empty[String], false, true) - val singletonAbs = new NioGcsPath(Array("absolute"), true, true) - - val absPath = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val startsWithAbsPath = new NioGcsPath(Array("absolute", "path", "to"), true, true) - val doesntStartsWithAbsPath = new NioGcsPath(Array("absolute", "path", "to", "another", "place"), true, true) - val absPathStartingLikeRel = new NioGcsPath(Array("some", "relative", "path"), true, true) - - val relPath = new NioGcsPath(Array("some", "relative", "path"), false, true) - val startsWithRelPath = new NioGcsPath(Array("some", "relative"), false, true) - val doesntStartsWithRelPath = new NioGcsPath(Array("some", "relative", "other", "path"), false, true) - val relPathStartingLikeAbs = new NioGcsPath(Array("absolute", "path", "to"), false, true) - - val paths = Table( - ("path1", "path2", "result"), - (empty, empty, true), - (empty, absPath, false), - (singletonAbs, singletonAbs, true), - (absPath, startsWithAbsPath, true), - (absPath, doesntStartsWithAbsPath, false), - (absPath, relPathStartingLikeAbs, true), - (absPath, relPath, false), - (relPath, startsWithRelPath, true), - (relPath, doesntStartsWithRelPath, false), - (relPath, absPathStartingLikeRel, false), - (relPath, absPath, false) - ) - - forAll(paths) { (p1, p2, res) => - val startsWith: Boolean = p1.startsWith(p2) - startsWith shouldBe res - val startsWith1: Boolean = p1.startsWith(p2.toString) - startsWith1 shouldBe res - } - } - - it should "implement endsWith" in { - val empty = new NioGcsPath(Array.empty[String], false, true) - val singletonAbs = new NioGcsPath(Array("absolute"), true, true) - - val absPath = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val doesntEndWithAbsPath = new NioGcsPath(Array("absolute", "path", "to", "another", "place"), true, true) - val absPathEndingLikeRel = new NioGcsPath(Array("relative", "path"), true, true) - - val relPath = new NioGcsPath(Array("some", "relative", "path"), false, true) - val endsWithRelPath = new NioGcsPath(Array("relative", "path"), false, true) - val doesntStartsWithRelPath = new NioGcsPath(Array("relative", "other", "path"), false, true) - val relPathEndingLikeAbs = new NioGcsPath(Array("path", "to", "somewhere"), false, true) - - val paths = Table( - ("path1", "path2", "result"), - (empty, empty, true), - (empty, absPath, false), - (singletonAbs, singletonAbs, true), - (absPath, absPath, true), - (absPath, doesntEndWithAbsPath, false), - (absPath, relPathEndingLikeAbs, true), - (absPath, relPath, false), - (relPath, endsWithRelPath, true), - (relPath, doesntStartsWithRelPath, false), - (relPath, absPathEndingLikeRel, false), - (relPath, absPath, false) - ) - - forAll(paths) { (p1, p2, res) => - p1.endsWith(p2) shouldBe res - p1.endsWith(p2.toString) shouldBe res - } - } - - it should "implement toUri" in { - val file = new NioGcsPath(Array("some", "file"), true, false) - val uri = file.toUri - uri.toString shouldBe "gs://some/file" - } - -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala deleted file mode 100644 index f959dcad544..00000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala +++ /dev/null @@ -1,26 +0,0 @@ -package cromwell.filesystems.gcs - -import org.scalatest.{FlatSpec, Matchers} - -class RefreshTokenModeSpec extends FlatSpec with Matchers { - - val refreshToken = RefreshTokenMode(name = "bar", clientId = "secret-id", clientSecret = "secret-secret") - - behavior of "RefreshTokenMode" - - it should "assert good workflow options" in { - val goodOptions = GoogleOptionsMap(Map("refresh_token" -> "token")) - refreshToken.assertWorkflowOptions(goodOptions) - } - - it should "fail to assert bad workflow options" in { - val badOptions = GoogleOptionsMap(Map("fresh_tokin" -> "broken")) - val noOptions = GoogleOptionsMap(Map.empty[String, String]) - - List(badOptions, noOptions).foreach { option => - the[IllegalArgumentException] thrownBy { - refreshToken.assertWorkflowOptions(option) - } should have message s"Missing parameters in workflow options: refresh_token" - } - } -} diff --git a/project/Dependencies.scala b/project/Dependencies.scala index a902e1b2a20..ee130f095d8 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -2,7 +2,7 @@ import sbt._ object Dependencies { lazy val lenthallV = "0.19" - lazy val wdl4sV = "0.6" + lazy val wdl4sV = "0.7" lazy val sprayV = "1.3.3" /* spray-json is an independent project from the "spray suite" @@ -12,22 +12,37 @@ object Dependencies { - http://doc.akka.io/docs/akka/2.4/scala/http/common/json-support.html#akka-http-spray-json */ lazy val sprayJsonV = "1.3.2" - lazy val akkaV = "2.4.9" + lazy val akkaV = "2.4.12" lazy val slickV = "3.1.1" - lazy val googleClientApiV = "1.20.0" + lazy val googleClientApiV = "1.22.0" + lazy val googleGenomicsServicesApiV = "1.20.0" lazy val betterFilesV = "2.16.0" lazy val catsV = "0.7.2" // Internal collections of dependencies + private val catsDependencies = List( + "org.typelevel" %% "cats" % "0.7.2", + "com.github.benhutchison" %% "mouse" % "0.5" + ) map (_ + /* + Exclude test framework cats-laws and its transitive dependency scalacheck. + If sbt detects scalacheck, it tries to run it. + Explicitly excluding the two problematic artifacts instead of including the three (or four?). + https://github.com/typelevel/cats/tree/v0.7.2#getting-started + Re "_2.11", see also: https://github.com/sbt/sbt/issues/1518 + */ + exclude("org.typelevel", "cats-laws_2.11") + exclude("org.typelevel", "cats-kernel-laws_2.11") + ) + private val baseDependencies = List( "org.broadinstitute" %% "lenthall" % lenthallV, - "org.typelevel" %% "cats" % catsV, - "com.github.benhutchison" %% "mouse" % "0.5", "com.iheart" %% "ficus" % "1.3.0", "org.scalatest" %% "scalatest" % "3.0.0" % Test, - "org.specs2" %% "specs2" % "3.7" % Test - ) + "org.pegdown" % "pegdown" % "1.6.0" % Test, + "org.specs2" %% "specs2-mock" % "3.8.5" % Test + ) ++ catsDependencies private val slf4jBindingDependencies = List( // http://logback.qos.ch/dependencies.html @@ -65,10 +80,10 @@ object Dependencies { ) private val googleCloudDependencies = List( - "com.google.gcloud" % "gcloud-java" % "0.0.9", - "com.google.oauth-client" % "google-oauth-client" % googleClientApiV, - "com.google.cloud.bigdataoss" % "gcsio" % "1.4.4", - "com.google.apis" % "google-api-services-genomics" % ("v1alpha2-rev14-" + googleClientApiV) + "com.google.apis" % "google-api-services-genomics" % ("v1alpha2-rev14-" + googleGenomicsServicesApiV), + "com.google.cloud" % "google-cloud-nio" % "0.3.0" + exclude("com.google.api.grpc", "grpc-google-common-protos") + exclude("com.google.cloud.datastore", "datastore-v1-protos") ) private val dbmsDependencies = List( @@ -86,15 +101,12 @@ object Dependencies { // Sub-project dependencies, added in addition to any dependencies inherited from .dependsOn(). - val gcsFileSystemDependencies = baseDependencies ++ googleApiClientDependencies ++ googleCloudDependencies + val gcsFileSystemDependencies = baseDependencies ++ googleApiClientDependencies ++ googleCloudDependencies ++ List ( + "com.github.pathikrit" %% "better-files" % betterFilesV + ) val databaseSqlDependencies = baseDependencies ++ slickDependencies ++ dbmsDependencies - val databaseMigrationDependencies = List( - "org.broadinstitute" %% "wdl4s" % wdl4sV, // Used in migration scripts - "com.github.pathikrit" %% "better-files" % betterFilesV % Test - ) ++ baseDependencies ++ liquibaseDependencies ++ dbmsDependencies - val coreDependencies = List( "com.typesafe.scala-logging" %% "scala-logging" % "3.4.0", "org.broadinstitute" %% "wdl4s" % wdl4sV, @@ -108,6 +120,10 @@ object Dependencies { // TODO: We're not using the "F" in slf4j. Core only supports logback, specifically the WorkflowLogger. slf4jBindingDependencies + val databaseMigrationDependencies = List( + "com.github.pathikrit" %% "better-files" % betterFilesV % Test + ) ++ liquibaseDependencies ++ dbmsDependencies + val htCondorBackendDependencies = List( "com.twitter" %% "chill" % "0.8.0", "org.mongodb" %% "casbah" % "3.0.0" @@ -118,12 +134,9 @@ object Dependencies { ) ++ sprayServerDependencies val engineDependencies = List( - "com.typesafe.scala-logging" %% "scala-logging" % "3.4.0", "org.webjars" % "swagger-ui" % "2.1.1", "commons-codec" % "commons-codec" % "1.10", "commons-io" % "commons-io" % "2.5", - "org.typelevel" %% "cats" % catsV, - "com.github.pathikrit" %% "better-files" % betterFilesV, "io.swagger" % "swagger-parser" % "1.0.22" % Test, "org.yaml" % "snakeyaml" % "1.17" % Test ) ++ sprayServerDependencies diff --git a/project/Merging.scala b/project/Merging.scala index 0591a09f9e2..2c941a76e65 100644 --- a/project/Merging.scala +++ b/project/Merging.scala @@ -25,10 +25,15 @@ object Merging { MergeStrategy.filterDistinctLines case ("spring.schemas" :: Nil) | ("spring.handlers" :: Nil) => MergeStrategy.filterDistinctLines + case "io.netty.versions.properties" :: Nil => + MergeStrategy.first + case "maven" :: "com.google.guava" :: xs => + MergeStrategy.first case _ => MergeStrategy.deduplicate } case "asm-license.txt" | "overview.html" | "cobertura.properties" => MergeStrategy.discard + case _ => MergeStrategy.deduplicate } } \ No newline at end of file diff --git a/project/Settings.scala b/project/Settings.scala index 4a092ce6802..f59d102e840 100644 --- a/project/Settings.scala +++ b/project/Settings.scala @@ -6,8 +6,8 @@ import Version._ import sbt.Keys._ import sbt._ import sbtassembly.AssemblyPlugin.autoImport._ -import sbtrelease.ReleasePlugin import sbtdocker.DockerPlugin.autoImport._ +import sbtrelease.ReleasePlugin object Settings { @@ -48,7 +48,13 @@ object Settings { "-Ywarn-numeric-widen", "-Ywarn-value-discard", "-Ywarn-unused", - "-Ywarn-unused-import" + "-Ywarn-unused-import", + "-Xfatal-warnings" + ) + + val docSettings = List( + // http://stackoverflow.com/questions/31488335/scaladoc-2-11-6-fails-on-throws-tag-with-unable-to-find-any-member-to-link#31497874 + "-no-link-warnings" ) lazy val assemblySettings = Seq( @@ -58,14 +64,17 @@ object Settings { logLevel in assembly := Level.Info, assemblyMergeStrategy in assembly := customMergeStrategy ) - + lazy val dockerSettings = Seq( imageNames in docker := Seq( - ImageName( - namespace = Option("broadinstitute"), - repository = name.value, - tag = Some(s"${version.value}") - ) + ImageName( + namespace = Option("broadinstitute"), + repository = name.value, + tag = Option(cromwellVersion)), + ImageName( + namespace = Option("broadinstitute"), + repository = name.value, + tag = Option(version.value)) ), dockerfile in docker := { // The assembly task generates a fat JAR file @@ -77,7 +86,7 @@ object Settings { expose(8000) add(artifact, artifactTargetPath) runRaw(s"ln -s $artifactTargetPath /app/cromwell.jar") - + // If you use the 'exec' form for an entry point, shell processing is not performed and // environment variable substitution does not occur. Thus we have to /bin/bash here // and pass along any subsequent command line arguments @@ -89,8 +98,7 @@ object Settings { cache = false, removeIntermediateContainers = BuildOptions.Remove.Always ) - ) - + ) val commonSettings = ReleasePlugin.projectSettings ++ testSettings ++ assemblySettings ++ dockerSettings ++ cromwellVersionWithGit ++ publishingSettings ++ List( @@ -98,6 +106,7 @@ object Settings { scalaVersion := "2.11.8", resolvers ++= commonResolvers, scalacOptions ++= compilerSettings, + scalacOptions in (Compile, doc) ++= docSettings, parallelExecution := false ) diff --git a/project/Testing.scala b/project/Testing.scala index 82df707d504..46ca8f15575 100644 --- a/project/Testing.scala +++ b/project/Testing.scala @@ -10,23 +10,25 @@ object Testing { lazy val DbmsTest = config("dbms") extend Test lazy val DockerTestTag = "DockerTest" - lazy val UseDockerTaggedTests = Tests.Argument("-n", DockerTestTag) - lazy val DontUseDockerTaggedTests = Tests.Argument("-l", DockerTestTag) + lazy val UseDockerTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", DockerTestTag) + lazy val DontUseDockerTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", DockerTestTag) lazy val CromwellIntegrationTestTag = "CromwellIntegrationTest" - lazy val UseCromwellIntegrationTaggedTests = Tests.Argument("-n", CromwellIntegrationTestTag) - lazy val DontUseCromwellIntegrationTaggedTests = Tests.Argument("-l", CromwellIntegrationTestTag) + lazy val UseCromwellIntegrationTaggedTests = + Tests.Argument(TestFrameworks.ScalaTest, "-n", CromwellIntegrationTestTag) + lazy val DontUseCromwellIntegrationTaggedTests = + Tests.Argument(TestFrameworks.ScalaTest, "-l", CromwellIntegrationTestTag) lazy val GcsIntegrationTestTag = "GcsIntegrationTest" - lazy val UseGcsIntegrationTaggedTests = Tests.Argument("-n", GcsIntegrationTestTag) - lazy val DontUseGcsIntegrationTaggedTests = Tests.Argument("-l", GcsIntegrationTestTag) + lazy val UseGcsIntegrationTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", GcsIntegrationTestTag) + lazy val DontUseGcsIntegrationTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", GcsIntegrationTestTag) lazy val DbmsTestTag = "DbmsTest" - lazy val UseDbmsTaggedTests = Tests.Argument("-n", DbmsTestTag) - lazy val DontUseDbmsTaggedTests = Tests.Argument("-l", DbmsTestTag) + lazy val UseDbmsTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", DbmsTestTag) + lazy val DontUseDbmsTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", DbmsTestTag) lazy val PostMVPTag = "PostMVP" - lazy val DontUsePostMVPTaggedTests = Tests.Argument("-l", PostMVPTag) + lazy val DontUsePostMVPTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", PostMVPTag) lazy val TestReportArgs = Tests.Argument(TestFrameworks.ScalaTest, "-oDSI", "-h", "target/test-reports") diff --git a/project/Version.scala b/project/Version.scala index e6de0a5cdee..70adedb03b8 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -3,8 +3,8 @@ import sbt.Keys._ import sbt._ object Version { - // Upcoming release, or current if we're on the master branch - val cromwellVersion = "0.22" + // Upcoming release, or current if we're on a master / hotfix branch + val cromwellVersion = "23" // Adapted from SbtGit.versionWithGit def cromwellVersionWithGit: Seq[Setting[_]] = @@ -39,6 +39,7 @@ object Version { // The project isSnapshot string passed in via command line settings, if desired. val isSnapshot = sys.props.get("project.isSnapshot").forall(_.toBoolean) - if (isSnapshot) s"$version-SNAPSHOT" else version + // For now, obfuscate SNAPSHOTs from sbt's developers: https://github.com/sbt/sbt/issues/2687#issuecomment-236586241 + if (isSnapshot) s"$version-SNAP" else version } } diff --git a/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala b/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala index 5284a9a8e2e..bf1df98d9ca 100644 --- a/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala +++ b/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala @@ -17,4 +17,6 @@ object CallMetadataKeys { val BackendLogsPrefix = "backendLogs" val JobId = "jobId" val CallRoot = "callRoot" + val SubWorkflowId = "subWorkflowId" + val SubWorkflowMetadata = "subWorkflowMetadata" } diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala b/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala index 374169d0555..6542c2082a7 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala @@ -36,7 +36,7 @@ case object MetadataBoolean extends MetadataType { override val typeName = "bool object MetadataValue { def apply(value: Any) = { - value match { + Option(value).getOrElse("") match { case WdlInteger(i) => new MetadataValue(i.toString, MetadataInt) case WdlFloat(f) => new MetadataValue(f.toString, MetadataNumber) case WdlBoolean(b) => new MetadataValue(b.toString, MetadataBoolean) @@ -75,16 +75,17 @@ object MetadataQueryJobKey { case class MetadataQuery(workflowId: WorkflowId, jobKey: Option[MetadataQueryJobKey], key: Option[String], includeKeysOption: Option[NonEmptyList[String]], - excludeKeysOption: Option[NonEmptyList[String]]) + excludeKeysOption: Option[NonEmptyList[String]], + expandSubWorkflows: Boolean) object MetadataQuery { - def forWorkflow(workflowId: WorkflowId) = MetadataQuery(workflowId, None, None, None, None) + def forWorkflow(workflowId: WorkflowId) = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) def forJob(workflowId: WorkflowId, jobKey: MetadataJobKey) = { - MetadataQuery(workflowId, Option(MetadataQueryJobKey.forMetadataJobKey(jobKey)), None, None, None) + MetadataQuery(workflowId, Option(MetadataQueryJobKey.forMetadataJobKey(jobKey)), None, None, None, expandSubWorkflows = false) } def forKey(key: MetadataKey) = { - MetadataQuery(key.workflowId, key.jobKey map MetadataQueryJobKey.forMetadataJobKey, Option(key.key), None, None) + MetadataQuery(key.workflowId, key.jobKey map MetadataQueryJobKey.forMetadataJobKey, Option(key.key), None, None, expandSubWorkflows = false) } } diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala index 672f68580d1..e2a62c784c3 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala @@ -50,7 +50,8 @@ object MetadataService { case class PutMetadataAction(events: Iterable[MetadataEvent]) extends MetadataServiceAction case class GetSingleWorkflowMetadataAction(workflowId: WorkflowId, includeKeysOption: Option[NonEmptyList[String]], - excludeKeysOption: Option[NonEmptyList[String]]) + excludeKeysOption: Option[NonEmptyList[String]], + expandSubWorkflows: Boolean) extends ReadAction case class GetMetadataQueryAction(key: MetadataQuery) extends ReadAction case class GetStatus(workflowId: WorkflowId) extends ReadAction diff --git a/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala b/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala index 76476d3fddd..5250107b63a 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala @@ -107,19 +107,19 @@ trait MetadataDatabaseAccess { val uuid = query.workflowId.id.toString val futureMetadata: Future[Seq[MetadataEntry]] = query match { - case MetadataQuery(_, None, None, None, None) => databaseInterface.queryMetadataEntries(uuid) - case MetadataQuery(_, None, Some(key), None, None) => databaseInterface.queryMetadataEntries(uuid, key) - case MetadataQuery(_, Some(jobKey), None, None, None) => + case MetadataQuery(_, None, None, None, None, _) => databaseInterface.queryMetadataEntries(uuid) + case MetadataQuery(_, None, Some(key), None, None, _) => databaseInterface.queryMetadataEntries(uuid, key) + case MetadataQuery(_, Some(jobKey), None, None, None, _) => databaseInterface.queryMetadataEntries(uuid, jobKey.callFqn, jobKey.index, jobKey.attempt) - case MetadataQuery(_, Some(jobKey), Some(key), None, None) => + case MetadataQuery(_, Some(jobKey), Some(key), None, None, _) => databaseInterface.queryMetadataEntries(uuid, key, jobKey.callFqn, jobKey.index, jobKey.attempt) - case MetadataQuery(_, None, None, Some(includeKeys), None) => + case MetadataQuery(_, None, None, Some(includeKeys), None, _) => databaseInterface. queryMetadataEntriesLikeMetadataKeys(uuid, includeKeys.map(_ + "%"), requireEmptyJobKey = false) - case MetadataQuery(_, None, None, None, Some(excludeKeys)) => + case MetadataQuery(_, None, None, None, Some(excludeKeys), _) => databaseInterface. queryMetadataEntryNotLikeMetadataKeys(uuid, excludeKeys.map(_ + "%"), requireEmptyJobKey = false) - case MetadataQuery(_, None, None, Some(includeKeys), Some(excludeKeys)) => Future.failed( + case MetadataQuery(_, None, None, Some(includeKeys), Some(excludeKeys), _) => Future.failed( new IllegalArgumentException( s"Include/Exclude keys may not be mixed: include = $includeKeys, exclude = $excludeKeys")) case invalidQuery => Future.failed(new IllegalArgumentException( diff --git a/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala index ebdc500de0f..5308d69f255 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala @@ -5,7 +5,7 @@ import cromwell.core.Dispatcher.ApiDispatcher import cromwell.core.{WorkflowId, WorkflowSubmitted} import cromwell.services.SingletonServicesStore import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata.{MetadataQuery, WorkflowQueryParameters} +import cromwell.services.metadata.{CallMetadataKeys, MetadataQuery, WorkflowQueryParameters} import scala.concurrent.Future import scala.util.{Failure, Success, Try} @@ -19,9 +19,12 @@ class ReadMetadataActor extends Actor with ActorLogging with MetadataDatabaseAcc implicit val ec = context.dispatcher def receive = { - case GetSingleWorkflowMetadataAction(workflowId, includeKeysOption, excludeKeysOption) => - queryAndRespond(MetadataQuery(workflowId, None, None, includeKeysOption, excludeKeysOption)) - case GetMetadataQueryAction(query@MetadataQuery(_, _, _, _, _)) => queryAndRespond(query) + case GetSingleWorkflowMetadataAction(workflowId, includeKeysOption, excludeKeysOption, expandSubWorkflows) => + val includeKeys = if (expandSubWorkflows) { + includeKeysOption map { _.::(CallMetadataKeys.SubWorkflowId) } + } else includeKeysOption + queryAndRespond(MetadataQuery(workflowId, None, None, includeKeys, excludeKeysOption, expandSubWorkflows)) + case GetMetadataQueryAction(query@MetadataQuery(_, _, _, _, _, _)) => queryAndRespond(query) case GetStatus(workflowId) => queryStatusAndRespond(workflowId) case GetLogs(workflowId) => queryLogsAndRespond(workflowId) case query: WorkflowQuery[_] => queryWorkflowsAndRespond(query.uri, query.parameters) diff --git a/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala b/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala index 291b4198836..7558a398882 100644 --- a/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala +++ b/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala @@ -160,6 +160,7 @@ object ServicesStoreSpec { s""" |db.url = "jdbc:hsqldb:mem:$${uniqueSchema};shutdown=false;hsqldb.tx=mvcc" |db.driver = "org.hsqldb.jdbcDriver" + |db.connectionTimeout = 3000 |driver = "slick.driver.HsqldbDriver$$" |liquibase.updateSchema = false |""".stripMargin) diff --git a/src/bin/travis/afterSuccess.sh b/src/bin/travis/afterSuccess.sh new file mode 100755 index 00000000000..e8bd24aae0a --- /dev/null +++ b/src/bin/travis/afterSuccess.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -e + +echo "BUILD_TYPE='$BUILD_TYPE'" +echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'" +echo "TRAVIS_PULL_REQUEST='$TRAVIS_PULL_REQUEST'" + +if [ "$BUILD_TYPE" == "sbt" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then + + if [ "$TRAVIS_BRANCH" == "develop" ]; then + sbt 'set test in Test := {}' publish + + elif [[ "$TRAVIS_BRANCH" =~ ^[0-9\.]+_hotfix$ ]]; then + docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" + sbt 'set test in Test := {}' -Dproject.isSnapshot=false dockerBuildAndPush + + fi + +fi diff --git a/src/bin/travis/publishSnapshot.sh b/src/bin/travis/publishSnapshot.sh deleted file mode 100755 index 9c18a97fdea..00000000000 --- a/src/bin/travis/publishSnapshot.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -set -e - -echo "BUILD_TYPE='$BUILD_TYPE'" -echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'" -echo "TRAVIS_PULL_REQUEST='$TRAVIS_PULL_REQUEST'" - -if [ "$BUILD_TYPE" == "sbt" ] && [ "$TRAVIS_BRANCH" == "develop" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then - sbt 'set test in Test := {}' publish -fi diff --git a/src/bin/travis/resources/centaur.inputs b/src/bin/travis/resources/centaur.inputs index c378c0f71a0..544aa8524ff 100644 --- a/src/bin/travis/resources/centaur.inputs +++ b/src/bin/travis/resources/centaur.inputs @@ -1,7 +1,7 @@ { - "centaur.centaur.cromwell_jar":"gs://cloud-cromwell-dev/travis-centaur/CROMWELL_JAR", - "centaur.centaur.cromwell_branch":"BRANCH", - "centaur.centaur.conf":"gs://cloud-cromwell-dev/travis-centaur/multiBackend.conf", - "centaur.centaur.pem":"gs://cloud-cromwell-dev/travis-centaur/cromwell-account.pem", - "centaur.centaur.token": "gs://cloud-cromwell-dev/travis-centaur/token.txt" + "centaur_workflow.centaur.cromwell_jar":"gs://cloud-cromwell-dev/travis-centaur/CROMWELL_JAR", + "centaur_workflow.centaur.centaur_branch":"CENTAUR_BRANCH", + "centaur_workflow.centaur.conf":"gs://cloud-cromwell-dev/travis-centaur/multiBackend.conf", + "centaur_workflow.centaur.pem":"gs://cloud-cromwell-dev/travis-centaur/cromwell-account.pem", + "centaur_workflow.centaur.token": "gs://cloud-cromwell-dev/travis-centaur/token.txt" } diff --git a/src/bin/travis/resources/centaur.wdl b/src/bin/travis/resources/centaur.wdl index 62e13c98c25..230b271aa41 100644 --- a/src/bin/travis/resources/centaur.wdl +++ b/src/bin/travis/resources/centaur.wdl @@ -1,5 +1,5 @@ task centaur { - String cromwell_branch + String centaur_branch File conf File pem File cromwell_jar @@ -11,7 +11,9 @@ task centaur { export SBT_OPTS=-Dsbt.ivy.home=/cromwell_root/tmp/.ivy2 git clone https://github.com/broadinstitute/centaur.git cd centaur - ./test_cromwell.sh -j${cromwell_jar} -c${conf} -r/cromwell_root -t ${secret} -elocaldockertest + git checkout ${centaur_branch} + cd .. + centaur/test_cromwell.sh -j${cromwell_jar} -c${conf} -r/cromwell_root -t ${secret} -elocaldockertest >>> output { @@ -26,6 +28,6 @@ task centaur { failOnStderr: false } } -workflow centaur { +workflow centaur_workflow { call centaur } diff --git a/src/bin/travis/resources/local_centaur.conf b/src/bin/travis/resources/local_centaur.conf index 68ba866bfe0..ef6df397522 100644 --- a/src/bin/travis/resources/local_centaur.conf +++ b/src/bin/travis/resources/local_centaur.conf @@ -16,3 +16,6 @@ spray.can { call-caching { enabled = true } + +backend.providers.Local.config.filesystems.local.localization = ["soft-link", "copy"] +backend.providers.Local.config.concurrent-job-limit = 20 diff --git a/src/bin/travis/test.sh b/src/bin/travis/test.sh index 8730db3af23..9fd024c6924 100755 --- a/src/bin/travis/test.sh +++ b/src/bin/travis/test.sh @@ -11,6 +11,8 @@ elif [ "$BUILD_TYPE" = "centaurLocal" ]; then "${SCRIPT_DIR}"/testCentaurLocal.sh elif [ "$BUILD_TYPE" = "sbt" ]; then "${SCRIPT_DIR}"/testSbt.sh +elif [ "$BUILD_TYPE" = "checkPublish" ]; then + "${SCRIPT_DIR}"/testCheckPublish.sh else echo "Unknown BUILD_TYPE: '$BUILD_TYPE'" exit 1 diff --git a/src/bin/travis/testCentaurJes.sh b/src/bin/travis/testCentaurJes.sh index 04813698d4b..96e5e8e8bf1 100755 --- a/src/bin/travis/testCentaurJes.sh +++ b/src/bin/travis/testCentaurJes.sh @@ -54,7 +54,7 @@ echo "RUNNING TRAVIS CENTAUR" sbt assembly # Update the inputs file with stuff specific to this run -sed -i "s/BRANCH/${TRAVIS_BRANCH}/g" src/bin/travis/resources/centaur.inputs +sed -i "s/CENTAUR_BRANCH/${CENTAUR_BRANCH}/g" src/bin/travis/resources/centaur.inputs CROMWELL_JAR=cromwell_${TRAVIS_BUILD_ID}.jar sed -i "s/CROMWELL_JAR/${CROMWELL_JAR}/g" src/bin/travis/resources/centaur.inputs @@ -69,9 +69,9 @@ EXIT_CODE="${PIPESTATUS[0]}" export WORKFLOW_ID=`grep "SingleWorkflowRunnerActor: Workflow submitted " log.txt | perl -pe 's/\e\[?.*?[\@-~]//g' | cut -f7 -d" "` # Grab the Centaur log from GCS and cat it so we see it in the main travis log. -export CENTAUR_LOG_PATH="gs://cloud-cromwell-dev/cromwell_execution/travis/centaur/${WORKFLOW_ID}/call-centaur//cromwell_root/logs/centaur.log" +export CENTAUR_LOG_PATH="gs://cloud-cromwell-dev/cromwell_execution/travis/centaur_workflow/${WORKFLOW_ID}/call-centaur/cromwell_root/logs/centaur.log" gsutil cp ${CENTAUR_LOG_PATH} centaur.log cat centaur.log -echo "More logs for this run are available at https://console.cloud.google.com/storage/browser/cloud-cromwell-dev/cromwell_execution/travis/centaur/${WORKFLOW_ID}/call-centaur/" +echo "More logs for this run are available at https://console.cloud.google.com/storage/browser/cloud-cromwell-dev/cromwell_execution/travis/centaur_workflow/${WORKFLOW_ID}/call-centaur/" exit "${EXIT_CODE}" diff --git a/src/bin/travis/testCentaurLocal.sh b/src/bin/travis/testCentaurLocal.sh index fe77a134718..337c302bff1 100755 --- a/src/bin/travis/testCentaurLocal.sh +++ b/src/bin/travis/testCentaurLocal.sh @@ -34,4 +34,6 @@ CROMWELL_JAR=$(find "$(pwd)/target/scala-2.11" -name "cromwell-*.jar") LOCAL_CONF="$(pwd)/src/bin/travis/resources/local_centaur.conf" git clone https://github.com/broadinstitute/centaur.git cd centaur -./test_cromwell.sh -j"${CROMWELL_JAR}" -c${LOCAL_CONF} +git checkout ${CENTAUR_BRANCH} +cd .. +centaur/test_cromwell.sh -j"${CROMWELL_JAR}" -c${LOCAL_CONF} diff --git a/src/bin/travis/testCheckPublish.sh b/src/bin/travis/testCheckPublish.sh new file mode 100755 index 00000000000..541b553850a --- /dev/null +++ b/src/bin/travis/testCheckPublish.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -e +set -x + +sbt clean assembly doc diff --git a/src/main/scala/cromwell/CromwellCommandLine.scala b/src/main/scala/cromwell/CromwellCommandLine.scala index c52ebbd66de..230045559b6 100644 --- a/src/main/scala/cromwell/CromwellCommandLine.scala +++ b/src/main/scala/cromwell/CromwellCommandLine.scala @@ -6,7 +6,7 @@ import better.files._ import cats.data.Validated._ import cats.syntax.cartesian._ import cats.syntax.validated._ -import cromwell.core.WorkflowSourceFiles +import cromwell.core.{WorkflowSourceFilesWithoutImports, WorkflowSourceFilesCollection, WorkflowSourceFilesWithDependenciesZip} import cromwell.util.FileUtil._ import lenthall.exception.MessageAggregation import cromwell.core.ErrorOr._ @@ -17,7 +17,7 @@ sealed abstract class CromwellCommandLine case object UsageAndExit extends CromwellCommandLine case object RunServer extends CromwellCommandLine final case class RunSingle(wdlPath: Path, - sourceFiles: WorkflowSourceFiles, + sourceFiles: WorkflowSourceFilesCollection, inputsPath: Option[Path], optionsPath: Option[Path], metadataPath: Option[Path]) extends CromwellCommandLine @@ -26,7 +26,7 @@ object CromwellCommandLine { def apply(args: Seq[String]): CromwellCommandLine = { args.headOption match { case Some("server") if args.size == 1 => RunServer - case Some("run") if args.size >= 2 && args.size <= 5 => RunSingle(args.tail) + case Some("run") if args.size >= 2 && args.size <= 6 => RunSingle(args.tail) case _ => UsageAndExit } } @@ -38,15 +38,19 @@ object RunSingle { val inputsPath = argPath(args, 1, Option(".inputs"), checkDefaultExists = false) val optionsPath = argPath(args, 2, Option(".options"), checkDefaultExists = true) val metadataPath = argPath(args, 3, None) + val importPath = argPath(args, 4, None) val wdl = readContent("WDL file", wdlPath) val inputsJson = readJson("Inputs", inputsPath) val optionsJson = readJson("Workflow Options", optionsPath) - val sourceFiles = (wdl |@| inputsJson |@| optionsJson) map { WorkflowSourceFiles.apply } + val sourceFileCollection = importPath match { + case Some(p) => (wdl |@| inputsJson |@| optionsJson) map { (w, i, o) => WorkflowSourceFilesWithDependenciesZip.apply(w, i, o, Files.readAllBytes(p)) } + case None => (wdl |@| inputsJson |@| optionsJson) map WorkflowSourceFilesWithoutImports.apply + } val runSingle = for { - sources <- sourceFiles + sources <- sourceFileCollection _ <- writeableMetadataPath(metadataPath) } yield RunSingle(wdlPath, sources, inputsPath, optionsPath, metadataPath) diff --git a/src/main/scala/cromwell/Main.scala b/src/main/scala/cromwell/Main.scala index 74a5405c119..c5287557242 100644 --- a/src/main/scala/cromwell/Main.scala +++ b/src/main/scala/cromwell/Main.scala @@ -74,11 +74,9 @@ object Main extends App { import PromiseActor.EnhancedActorRef - val promise = runner.askNoTimeout(RunWorkflow) - waitAndExit(promise, CromwellSystem) + waitAndExit(runner.askNoTimeout(RunWorkflow), CromwellSystem) } - private def waitAndExit(futureResult: Future[Any], workflowManagerSystem: CromwellSystem): Unit = { Await.ready(futureResult, Duration.Inf) @@ -100,17 +98,21 @@ object Main extends App { |java -jar cromwell.jar | |Actions: - |run [ [ - | []]] + |run [] [] + | [] [] | | Given a WDL file and JSON file containing the value of the | workflow inputs, this will run the workflow locally and | print out the outputs in JSON format. The workflow | options file specifies some runtime configuration for the | workflow (see README for details). The workflow metadata - | output is an optional file path to output the metadata. + | output is an optional file path to output the metadata. The + | directory of WDL files is optional. However, it is required + | if the primary workflow imports workflows that are outside + | of the root directory of the Cromwell project. + | | Use a single dash ("-") to skip optional files. Ex: - | run noinputs.wdl - - metadata.json + | run noinputs.wdl - - metadata.json - | |server | diff --git a/src/test/scala/cromwell/CromwellCommandLineSpec.scala b/src/test/scala/cromwell/CromwellCommandLineSpec.scala index 42f03fdfcb4..66c8ee29ce6 100644 --- a/src/test/scala/cromwell/CromwellCommandLineSpec.scala +++ b/src/test/scala/cromwell/CromwellCommandLineSpec.scala @@ -1,9 +1,9 @@ package cromwell import better.files._ -import cromwell.core.PathFactory._ +import cromwell.core.path.PathImplicits._ import cromwell.util.SampleWdl -import cromwell.util.SampleWdl.ThreeStep +import cromwell.util.SampleWdl.{FileClobber, FilePassingWorkflow, ThreeStep} import org.scalatest.{FlatSpec, Matchers} import scala.util.Try @@ -30,7 +30,7 @@ class CromwellCommandLineSpec extends FlatSpec with Matchers { } it should "fail with too many arguments to run" in { - CromwellCommandLine(List("run", "bork", "bork", "bork", "bork", "bork")) + CromwellCommandLine(List("run", "bork", "bork", "bork", "bork", "bork", "blerg")) } it should "RunSingle when supplying wdl and inputs" in { @@ -76,6 +76,23 @@ class CromwellCommandLineSpec extends FlatSpec with Matchers { ccl.isFailure shouldBe true ccl.failed.get.getMessage should include("Unable to write to metadata directory:") } + + it should "run if imports directory is a .zip file" in { + val wdlDir = File.newTemporaryDirectory("wdlDirectory") + + val filePassing = File.newTemporaryFile("filePassing", ".wdl", Option(wdlDir)) + val fileClobber = File.newTemporaryFile("fileClobber", ".wdl", Option(wdlDir)) + filePassing write FilePassingWorkflow.wdlSource() + fileClobber write FileClobber.wdlSource() + + val zippedDir = wdlDir.zip() + val zippedPath = zippedDir.pathAsString + + val ccl = Try(CromwellCommandLine(List("run", filePassing.pathAsString, "-", "-", "-", zippedPath))) + ccl.isFailure shouldBe false + + zippedDir.delete(swallowIOExceptions = true) + } } object CromwellCommandLineSpec { diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala index 75f2f779a51..3c8e48fd49d 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala @@ -5,10 +5,10 @@ import com.typesafe.config.Config import com.typesafe.scalalogging.StrictLogging import cromwell.backend._ import cromwell.backend.impl.htcondor.caching.CacheActorFactory -import cromwell.backend.io.JobPaths +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.SharedFileSystemExpressionFunctions import cromwell.core.{CallContext, WorkflowOptions} -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.WdlStandardLibraryFunctions import scala.util.{Failure, Success, Try} @@ -17,7 +17,7 @@ case class HtCondorBackendFactory(name: String, configurationDescriptor: Backend extends BackendLifecycleActorFactory with StrictLogging { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { Option(HtCondorInitializationActor.props(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } @@ -32,14 +32,14 @@ case class HtCondorBackendFactory(name: String, configurationDescriptor: Backend override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) + val jobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, configurationDescriptor.backendConfig) val callContext = CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toAbsolutePath.toString, jobPaths.stderr.toAbsolutePath.toString ) - new SharedFileSystemExpressionFunctions(HtCondorJobExecutionActor.fileSystems, callContext) + new SharedFileSystemExpressionFunctions(HtCondorJobExecutionActor.pathBuilders, callContext) } private def resolveCacheProviderProps(workflowOptions: WorkflowOptions) = { diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala index b0fdc75b2b8..1212876b677 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala @@ -7,9 +7,9 @@ import cromwell.backend.validation.RuntimeAttributesDefault import cromwell.backend.validation.RuntimeAttributesKeys._ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} import cromwell.core.WorkflowOptions +import wdl4s.TaskCall import wdl4s.types.{WdlBooleanType, WdlIntegerType, WdlStringType} import wdl4s.values.WdlValue -import wdl4s.Call import scala.concurrent.Future import scala.util.Try @@ -19,14 +19,14 @@ object HtCondorInitializationActor { ContinueOnReturnCodeKey, CpuKey, MemoryKey, DiskKey) def props(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], configurationDescriptor: BackendConfigurationDescriptor, serviceRegistryActor: ActorRef): Props = Props(new HtCondorInitializationActor(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } class HtCondorInitializationActor(override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], + override val calls: Set[TaskCall], override val configurationDescriptor: BackendConfigurationDescriptor, override val serviceRegistryActor: ActorRef) extends BackendWorkflowInitializationActor { diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala index 400c6a55f03..d005016ca3c 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala @@ -1,20 +1,22 @@ package cromwell.backend.impl.htcondor -import java.nio.file.FileSystems import java.nio.file.attribute.PosixFilePermission import java.util.UUID import akka.actor.{ActorRef, Props} -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend._ import cromwell.backend.impl.htcondor.caching.CacheActor._ import cromwell.backend.impl.htcondor.caching.localization.CachedResultLocalization -import cromwell.backend.io.JobPaths +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.{SharedFileSystem, SharedFileSystemExpressionFunctions} +import cromwell.backend.wdl.Command +import cromwell.core.path.JavaWriterImplicits._ +import cromwell.core.path.{DefaultPathBuilder, PathBuilder} import cromwell.services.keyvalue.KeyValueServiceActor._ import cromwell.services.metadata.CallMetadataKeys import org.apache.commons.codec.digest.DigestUtils -import wdl4s._ +import wdl4s.EvaluatedTaskInputs import wdl4s.parser.MemoryUnit import wdl4s.types.{WdlArrayType, WdlFileType} import wdl4s.util.TryUtil @@ -27,7 +29,7 @@ import scala.util.{Failure, Success, Try} object HtCondorJobExecutionActor { val HtCondorJobIdKey = "htCondor_job_id" - val fileSystems = List(FileSystems.getDefault) + val pathBuilders = List(DefaultPathBuilder) def props(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor, serviceRegistryActor: ActorRef, cacheActorProps: Option[Props]): Props = Props(new HtCondorJobExecutionActor(jobDescriptor, configurationDescriptor, serviceRegistryActor, cacheActorProps)) @@ -41,9 +43,9 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor import HtCondorJobExecutionActor._ import better.files._ - import cromwell.core.PathFactory._ private val tag = s"CondorJobExecutionActor-${jobDescriptor.call.fullyQualifiedName}:" + override val pathBuilders: List[PathBuilder] = HtCondorJobExecutionActor.pathBuilders implicit val executionContext = context.dispatcher @@ -53,7 +55,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor private val fileSystemsConfig = configurationDescriptor.backendConfig.getConfig("filesystems") override val sharedFileSystemConfig = fileSystemsConfig.getConfig("local") private val workflowDescriptor = jobDescriptor.workflowDescriptor - private val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobDescriptor.key) + private val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, configurationDescriptor.backendConfig) // Files private val executionDir = jobPaths.callExecutionRoot @@ -72,9 +74,9 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor private lazy val stderrWriter = extProcess.tailedWriter(100, submitFileStderr) private val call = jobDescriptor.key.call - private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, fileSystems) + private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, pathBuilders) - private val lookup = jobDescriptor.inputs.apply _ + private val lookup = jobDescriptor.fullyQualifiedInputs.apply _ private val runtimeAttributes = { val evaluateAttrs = call.task.runtimeAttributes.attrs mapValues (_.evaluate(lookup, callEngineFunction)) @@ -202,18 +204,18 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor condorJobId = Option(overallJobIdentifier) self ! TrackTaskStatus(overallJobIdentifier) - case _ => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, + case _ => self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException("Failed to retrieve job(id) and cluster id"), Option(condorReturnCode))) } case 0 => log.error(s"Unexpected! Received return code for condor submission as 0, although stderr file is non-empty: {}", File(submitFileStderr).lines) - self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, + self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed. HtCondor returned zero status code but non empty stderr file: $condorReturnCode"), Option(condorReturnCode))) case nonZeroExitCode: Int => - self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, + self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed. HtCondor returned non zero status code: $condorReturnCode"), Option(condorReturnCode))) } } @@ -229,16 +231,16 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor context.system.scheduler.scheduleOnce(pollingInterval.seconds, self, TrackTaskStatus(jobIdentifier)) () case Success(Some(rc)) if runtimeAttributes.continueOnReturnCode.continueFor(rc) => self ! JobExecutionResponse(processSuccess(rc)) - case Success(Some(rc)) => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, + case Success(Some(rc)) => self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException("Job exited with invalid return code: " + rc), Option(rc))) - case Failure(error) => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, error, None)) + case Failure(error) => self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, error, None)) } } private def processSuccess(rc: Int): BackendJobExecutionResponse = { evaluateOutputs(callEngineFunction, outputMapper(jobPaths)) match { case Success(outputs) => - val succeededResponse = SucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) + val succeededResponse = JobSucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) log.debug("{} Storing data into cache for hash {}.", tag, jobHash) // If cache fails to store data for any reason it should not stop the workflow/task execution but log the issue. cacheActor foreach { _ ! StoreExecutionResult(jobHash, succeededResponse) } @@ -247,12 +249,12 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor val message = Option(e.getMessage) map { ": " + _ } getOrElse "" - FailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) + JobFailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) } } private def calculateHash: String = { - val cmd = call.task.instantiateCommand(jobDescriptor.inputs, callEngineFunction, identity) match { + val cmd = Command.instantiate(jobDescriptor, callEngineFunction) match { case Success(command) => command case Failure(ex) => val errMsg = s"$tag Cannot instantiate job command for caching purposes due to ${ex.getMessage}." @@ -276,7 +278,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor executionDir.toString.toFile.createIfNotExists(asDirectory = true, createParents = true) log.debug("{} Resolving job command", tag) - val command = localizeInputs(jobPaths.callInputsRoot, runtimeAttributes.dockerImage.isDefined, fileSystems, jobDescriptor.inputs) flatMap { + val command = localizeInputs(jobPaths.callInputsRoot, runtimeAttributes.dockerImage.isDefined)(jobDescriptor.inputDeclarations) flatMap { localizedInputs => resolveJobCommand(localizedInputs) } @@ -296,7 +298,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor HtCondorRuntimeKeys.Disk -> runtimeAttributes.disk.to(MemoryUnit.KB).amount.toLong ) - cmds.generateSubmitFile(submitFilePath, attributes) // This writes the condor submit file + cmds.generateSubmitFile(submitFilePath, attributes, runtimeAttributes.nativeSpecs) // This writes the condor submit file () } catch { @@ -306,7 +308,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor } } - private def resolveJobCommand(localizedInputs: CallInputs): Try[String] = { + private def resolveJobCommand(localizedInputs: EvaluatedTaskInputs): Try[String] = { val command = if (runtimeAttributes.dockerImage.isDefined) { modifyCommandForDocker(call.task.instantiateCommand(localizedInputs, callEngineFunction, identity), localizedInputs) } else { @@ -329,7 +331,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), metadataKeyValues) } - private def modifyCommandForDocker(jobCmd: Try[String], localizedInputs: CallInputs): Try[String] = { + private def modifyCommandForDocker(jobCmd: Try[String], localizedInputs: EvaluatedTaskInputs): Try[String] = { Try { val dockerInputDataVol = localizedInputs.values.collect { case file if file.wdlType == WdlFileType => @@ -362,16 +364,16 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor createExecutionFolderAndScript() executeTask() } catch { - case e: Exception => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, e, None)) + case e: Exception => self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, e, None)) } } - private def localizeCachedResponse(succeededResponse: SucceededResponse): BackendJobExecutionResponse = { + private def localizeCachedResponse(succeededResponse: JobSucceededResponse): BackendJobExecutionResponse = { Try(localizeCachedOutputs(executionDir, succeededResponse.jobOutputs)) match { case Success(outputs) => executionDir.toString.toFile.createIfNotExists(asDirectory = true, createParents = true) - SucceededResponse(jobDescriptor.key, succeededResponse.returnCode, outputs, None, Seq.empty) - case Failure(exception) => FailedNonRetryableResponse(jobDescriptor.key, exception, None) + JobSucceededResponse(jobDescriptor.key, succeededResponse.returnCode, outputs, None, Seq.empty) + case Failure(exception) => JobFailedNonRetryableResponse(jobDescriptor.key, exception, None) } } } diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala index f8dd9a59595..758d2dbb4ac 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala @@ -12,8 +12,8 @@ import cromwell.backend.validation.RuntimeAttributesValidation._ import cromwell.core._ import cromwell.core.ErrorOr._ import lenthall.exception.MessageAggregation -import wdl4s.types.{WdlIntegerType, WdlStringType, WdlBooleanType, WdlType} -import wdl4s.values.{WdlString, WdlBoolean, WdlInteger, WdlValue} +import wdl4s.types._ +import wdl4s.values.{WdlArray, WdlBoolean, WdlInteger, WdlString, WdlValue} object HtCondorRuntimeAttributes { @@ -26,6 +26,7 @@ object HtCondorRuntimeAttributes { val DockerWorkingDirKey = "dockerWorkingDir" val DockerOutputDirKey = "dockerOutputDir" val DiskKey = "disk" + val NativeSpecsKey = "nativeSpecs" val staticDefaults = Map( FailOnStderrKey -> WdlBoolean(FailOnStderrDefaultValue), @@ -43,7 +44,8 @@ object HtCondorRuntimeAttributes { DockerOutputDirKey -> Set(WdlStringType), CpuKey -> Set(WdlIntegerType), MemoryKey -> Set(WdlStringType), - DiskKey -> Set(WdlStringType) + DiskKey -> Set(WdlStringType), + NativeSpecsKey -> Set(WdlArrayType(WdlStringType)) ) def apply(attrs: Map[String, WdlValue], options: WorkflowOptions): HtCondorRuntimeAttributes = { @@ -59,9 +61,10 @@ object HtCondorRuntimeAttributes { val cpu = validateCpu(withDefaultValues.get(CpuKey), noValueFoundFor(CpuKey)) val memory = validateMemory(withDefaultValues.get(MemoryKey), noValueFoundFor(MemoryKey)) val disk = validateDisk(withDefaultValues.get(DiskKey), noValueFoundFor(DiskKey)) + val nativeSpecs = validateNativeSpecs(withDefaultValues.get(NativeSpecsKey), None.validNel) - (continueOnReturnCode |@| docker |@| dockerWorkingDir |@| dockerOutputDir |@| failOnStderr |@| cpu |@| memory |@| disk) map { - new HtCondorRuntimeAttributes(_, _, _, _, _, _, _, _) + (continueOnReturnCode |@| docker |@| dockerWorkingDir |@| dockerOutputDir |@| failOnStderr |@| cpu |@| memory |@| disk |@| nativeSpecs) map { + new HtCondorRuntimeAttributes(_, _, _, _, _, _, _, _, _) } match { case Valid(x) => x case Invalid(nel) => throw new RuntimeException with MessageAggregation { @@ -97,6 +100,17 @@ object HtCondorRuntimeAttributes { case None => onMissingKey } } + + private def validateNativeSpecs(value: Option[WdlValue], onMissingKey: => ErrorOr[Option[Array[String]]]): ErrorOr[Option[Array[String]]] = { + val nativeSpecsWrongFormatMsg = s"Expecting $NativeSpecsKey runtime attribute to be an Array of Strings. Exception: %s" + value match { + case Some(ns: WdlArray) if ns.wdlType.memberType.equals(WdlStringType) => + val nsa = ns.value.map { value => value.valueString }.toArray + Option(nsa).validNel + case Some(_) => String.format(nativeSpecsWrongFormatMsg, "Not supported WDL type value").invalidNel + case None => onMissingKey + } + } } case class HtCondorRuntimeAttributes(continueOnReturnCode: ContinueOnReturnCode, @@ -106,4 +120,5 @@ case class HtCondorRuntimeAttributes(continueOnReturnCode: ContinueOnReturnCode, failOnStderr: Boolean, cpu: Int, memory: MemorySize, - disk: MemorySize) + disk: MemorySize, + nativeSpecs: Option[Array[String]]) diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala index 76a81c08a3b..35af6dd1d1b 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala @@ -5,8 +5,9 @@ import java.nio.file.{Files, Path} import better.files._ import com.typesafe.scalalogging.StrictLogging import cromwell.backend.impl.htcondor -import cromwell.core.PathFactory.{EnhancedPath, FlushingAndClosingWriter} -import cromwell.core.{TailedWriter, UntailedWriter} +import cromwell.core.path.{TailedWriter, UntailedWriter} +import cromwell.core.path.PathImplicits._ +import cromwell.core.path.JavaWriterImplicits._ import scala.sys.process._ @@ -55,22 +56,28 @@ class HtCondorCommands extends StrictLogging { */ def writeScript(instantiatedCommand: String, filePath: Path, containerRoot: Path): Unit = { logger.debug(s"Writing bash script for execution. Command: $instantiatedCommand.") - File(filePath).write( - s"""#!/bin/sh - |cd $containerRoot - |$instantiatedCommand - |echo $$? > rc - |""".stripMargin) + val scriptBody = s""" + +#!/bin/sh +cd $containerRoot +$instantiatedCommand +echo $$? > rc + +""".trim + "\n" + File(filePath).write(scriptBody) () } - def generateSubmitFile(path: Path, attributes: Map[String, Any]): String = { + def generateSubmitFile(path: Path, attributes: Map[String, Any], nativeSpecs: Option[Array[String]]): String = { def htCondorSubmitCommand(filePath: Path) = { s"${HtCondorCommands.Submit} ${filePath.toString}" } val submitFileWriter = path.untailed - attributes.foreach(attribute => submitFileWriter.writeWithNewline(s"${attribute._1}=${attribute._2}")) + attributes.foreach { attribute => submitFileWriter.writeWithNewline(s"${attribute._1}=${attribute._2}") } + //Native specs is intended for attaching HtCondor native configuration such as 'requirements' and 'rank' definition + //directly to the submit file. + nativeSpecs foreach { _.foreach { submitFileWriter.writeWithNewline } } submitFileWriter.writeWithNewline(HtCondorRuntimeKeys.Queue) submitFileWriter.writer.flushAndClose() logger.debug(s"submit file name is : $path") diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala index 70799bd677a..74fb3ade536 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala @@ -1,7 +1,7 @@ package cromwell.backend.impl.htcondor.caching import akka.actor.{Actor, ActorLogging} -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.backend.impl.htcondor.caching.CacheActor._ import cromwell.backend.impl.htcondor.caching.exception.{CachedResultAlreadyExistException, CachedResultNotFoundException} import cromwell.backend.impl.htcondor.caching.model.CachedExecutionResult @@ -10,10 +10,10 @@ object CacheActor { trait CacheActorCommand case class ReadExecutionResult(hash: String) extends CacheActorCommand - case class StoreExecutionResult(hash: String, succeededResponse: SucceededResponse) extends CacheActorCommand + case class StoreExecutionResult(hash: String, succeededResponse: JobSucceededResponse) extends CacheActorCommand trait CacheActorResponse - case class ExecutionResultFound(succeededResponse: SucceededResponse) extends CacheActorResponse + case class ExecutionResultFound(succeededResponse: JobSucceededResponse) extends CacheActorResponse case object ExecutionResultNotFound extends CacheActorResponse case class ExecutionResultStored(hash: String) extends CacheActorResponse case object ExecutionResultAlreadyExist extends CacheActorResponse diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala index 2f8254e5c2e..127141c5bc6 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala @@ -23,7 +23,7 @@ trait CachedResultLocalization { WdlSingleFile(slPath.toString) } - def localizeCachedOutputs(executionPath: Path, outputs: JobOutputs): JobOutputs = { + def localizeCachedOutputs(executionPath: Path, outputs: CallOutputs): CallOutputs = { outputs map { case (lqn, jobOutput) => jobOutput.wdlValue.wdlType match { case WdlFileType => (lqn -> JobOutput(localizeCachedFile(executionPath, jobOutput.wdlValue))) diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala index fdff708041e..1b023fd410a 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala @@ -1,6 +1,6 @@ package cromwell.backend.impl.htcondor.caching.model -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse -case class CachedExecutionResult(hash: String, succeededResponse: SucceededResponse) +case class CachedExecutionResult(hash: String, succeededResponse: JobSucceededResponse) diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala index 7f99e1565f2..cf6cf11511c 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala @@ -4,7 +4,7 @@ import com.mongodb.DBObject import com.mongodb.casbah.MongoCollection import com.mongodb.casbah.commons.{MongoDBObject, TypeImports} import com.mongodb.util.JSON -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.backend.impl.htcondor.caching.CacheActor import cromwell.backend.impl.htcondor.caching.exception.{CachedResultAlreadyExistException, CachedResultNotFoundException} import cromwell.backend.impl.htcondor.caching.model.CachedExecutionResult @@ -55,7 +55,7 @@ class MongoCacheActor(collection: MongoCollection, private def deserializeSucceededResponse(mongoDbObject: TypeImports.DBObject): CachedExecutionResult = { val cachedResult = JsonParser(mongoDbObject.toString).convertTo[MongoCachedExecutionResult] - val succeededResponse = deserialize(cachedResult.succeededResponse.byteArray, classOf[SucceededResponse]) + val succeededResponse = deserialize(cachedResult.succeededResponse.byteArray, classOf[JobSucceededResponse]) CachedExecutionResult(cachedResult.hash, succeededResponse) } diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala index ce1097f4e79..9a03fecca26 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala @@ -5,17 +5,17 @@ import better.files._ import org.scalatest.{Matchers, WordSpecLike} class HtCondorCommandSpec extends WordSpecLike with Matchers { - val attributes = Map("executable" -> "test.sh", "input" -> "/temp/test", "error"->"stderr") - val resultAttributes = List("executable=test.sh","input=/temp/test","error=stderr", "queue") - val htCondorCommands = new HtCondorCommands + private val attributes = Map("executable" -> "test.sh", "input" -> "/temp/test", "error"->"stderr") + private val resultAttributes = List("executable=test.sh","input=/temp/test","error=stderr", "spec1", "spec2", "queue") + private val htCondorCommands = new HtCondorCommands + private val nativeSpecs = Option(Array("spec1", "spec2")) "submitCommand method" should { "return submit file with content passed to it" in { - val dir = File.newTemporaryFile() - val command = htCondorCommands.generateSubmitFile(dir.path,attributes) - val file = dir - resultAttributes shouldEqual dir.lines.toList - dir.delete() + val file = File.newTemporaryFile() + val command = htCondorCommands.generateSubmitFile(file.path, attributes, nativeSpecs) + resultAttributes shouldEqual file.lines.toList + file.delete() command shouldEqual s"condor_submit ${file.path}" } } diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala index 9e71d1a1497..29d19d11779 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala @@ -5,7 +5,7 @@ import cromwell.backend.BackendWorkflowInitializationActor.Initialize import cromwell.backend.{BackendConfigurationDescriptor, BackendSpec, BackendWorkflowDescriptor} import cromwell.core.TestKitSuite import org.scalatest.{Matchers, WordSpecLike} -import wdl4s.Call +import wdl4s.TaskCall import scala.concurrent.duration._ @@ -29,12 +29,12 @@ class HtCondorInitializationActorSpec extends TestKitSuite("HtCondorInitializati | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin - private def getHtCondorBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], conf: BackendConfigurationDescriptor) = { + private def getHtCondorBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], conf: BackendConfigurationDescriptor) = { system.actorOf(HtCondorInitializationActor.props(workflowDescriptor, calls, conf, emptyActor)) } @@ -43,7 +43,7 @@ class HtCondorInitializationActorSpec extends TestKitSuite("HtCondorInitializati within(Timeout) { EventFilter.warning(message = s"Key/s [proc] is/are not supported by HtCondorBackend. Unsupported attributes will not be part of jobs executions.", occurrences = 1) intercept { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { proc: 1 }""") - val backend = getHtCondorBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, + val backend = getHtCondorBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, emptyBackendConfig) backend ! Initialize } diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala index 589554e94d3..33bc5c71c3e 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala @@ -7,15 +7,15 @@ import akka.actor.{Actor, Props} import akka.testkit.{ImplicitSender, TestActorRef} import better.files._ import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.impl.htcondor.caching.CacheActor import cromwell.backend.impl.htcondor.caching.exception.CachedResultNotFoundException import cromwell.backend.impl.htcondor.caching.model.CachedExecutionResult -import cromwell.backend.io.JobPaths +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendSpec} import cromwell.core._ +import cromwell.core.path.{PathWriter, TailedWriter, UntailedWriter} import cromwell.services.keyvalue.KeyValueServiceActor.{KvGet, KvPair, KvPut} -import org.mockito.Matchers._ import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.concurrent.PatienceConfiguration.Timeout @@ -54,7 +54,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -73,7 +73,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -92,7 +92,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -102,7 +102,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc | root = "local-cromwell-executions" | | docker { - | cmd = "docker run -w %s %s %s %s --rm %s %s" + | cmd = "docker run -w %s %s %s %s --rm %s /bin/bash -c \\"%s\\"" | defaultWorkingDir = "/workingDir/" | defaultOutputDir = "/output/" | } @@ -147,7 +147,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc }).underlyingActor whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(htCondorProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(htCondorProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(htCondorProcess, times(1)).untailedWriter(any[Path]) @@ -179,7 +179,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc }).underlyingActor whenReady(backend.recover, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] } cleanUpJob(jobPaths) @@ -207,7 +207,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc }).underlyingActor whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(htCondorProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(htCondorProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(htCondorProcess, times(1)).untailedWriter(any[Path]) @@ -237,8 +237,8 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(-1)) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("Job exited with invalid return code")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("Job exited with invalid return code")) } cleanUpJob(jobPaths) @@ -271,7 +271,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(911)) whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] } cleanUpJob(jobPaths) @@ -288,7 +288,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc """.stripMargin val jsonInputFile = createCannedFile("testFile", "some content").pathAsString val inputs = Map( - "inputFile" -> WdlFile(jsonInputFile) + "wf_hello.hello.inputFile" -> WdlFile(jsonInputFile) ) val jobDescriptor = prepareJob(helloWorldWdlWithFileInput, runtime, Option(inputs)) val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) @@ -310,7 +310,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(0)) whenReady(backend.execute) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] } val bashScript = Source.fromFile(jobPaths.script.toFile).getLines.mkString @@ -318,7 +318,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc assert(bashScript.contains("docker run -w /workingDir/ -v")) assert(bashScript.contains(":/workingDir/")) assert(bashScript.contains(":ro")) - assert(bashScript.contains("/call-hello/execution:/outputDir/ --rm ubuntu/latest echo")) + assert(bashScript.contains("/call-hello/execution:/outputDir/ --rm ubuntu/latest /bin/bash -c \"echo")) cleanUpJob(jobPaths) } @@ -346,8 +346,8 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(-1)) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("Could not write the file.")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("Could not write the file.")) } cleanUpJob(jobPaths) @@ -372,7 +372,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc createCannedFile(prefix = "testFile2", contents = "some other content", dir = Some(tempDir2)).pathAsString val inputs = Map( - "inputFiles" -> WdlArray(WdlArrayType(WdlFileType), Seq(WdlFile(jsonInputFile), WdlFile(jsonInputFile2))) + "wf_hello.hello.inputFiles" -> WdlArray(WdlArrayType(WdlFileType), Seq(WdlFile(jsonInputFile), WdlFile(jsonInputFile2))) ) val jobDescriptor = prepareJob(helloWorldWdlWithFileArrayInput, runtime, Option(inputs)) val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) @@ -394,7 +394,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(0)) whenReady(backend.execute) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] } val bashScript = Source.fromFile(jobPaths.script.toFile).getLines.mkString @@ -403,12 +403,12 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc assert(bashScript.contains(":/workingDir/")) assert(bashScript.contains(tempDir1.toAbsolutePath.toString)) assert(bashScript.contains(tempDir2.toAbsolutePath.toString)) - assert(bashScript.contains("/call-hello/execution:/outputDir/ --rm ubuntu/latest echo")) + assert(bashScript.contains("/call-hello/execution:/outputDir/ --rm ubuntu/latest /bin/bash -c \"echo")) cleanUpJob(jobPaths) } - private def cleanUpJob(jobPaths: JobPaths): Unit = { + private def cleanUpJob(jobPaths: JobPathsWithDocker): Unit = { File(jobPaths.workflowRoot).delete(true) () } @@ -425,7 +425,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc val backendWorkflowDescriptor = buildWorkflowDescriptor(wdl = source, inputs = inputFiles.getOrElse(Map.empty), runtime = runtimeString) val backendConfigurationDescriptor = BackendConfigurationDescriptor(backendConfig, ConfigFactory.load) val jobDesc = jobDescriptorFromSingleCallWorkflow(backendWorkflowDescriptor, inputFiles.getOrElse(Map.empty), emptyWorkflowOptions, Set.empty) - val jobPaths = new JobPaths(backendWorkflowDescriptor, backendConfig, jobDesc.key) + val jobPaths = new JobPathsWithDocker(jobDesc.key, backendWorkflowDescriptor, backendConfig) val executionDir = File(jobPaths.callExecutionRoot) val stdout = File(executionDir.pathAsString, "stdout") stdout.createIfNotExists(asDirectory = false, createParents = true) @@ -440,7 +440,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc TestJobDescriptor(jobDesc, jobPaths, backendConfigurationDescriptor) } - private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPaths, backendConfigurationDescriptor: BackendConfigurationDescriptor) + private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPathsWithDocker, backendConfigurationDescriptor: BackendConfigurationDescriptor) trait MockWriter extends Writer { var closed = false diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala index db95a999e09..659d700fa42 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala @@ -30,7 +30,7 @@ class HtCondorRuntimeAttributesSpec extends WordSpecLike with Matchers { | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -39,7 +39,8 @@ class HtCondorRuntimeAttributesSpec extends WordSpecLike with Matchers { val memorySize = MemorySize.parse("0.512 GB").get val diskSize = MemorySize.parse("1.024 GB").get - val staticDefaults = new HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), None, None, None, false, 1, memorySize, diskSize) + val staticDefaults = new HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), None, None, None, false, 1, + memorySize, diskSize, None) def workflowOptionsWithDefaultRA(defaults: Map[String, JsValue]) = { WorkflowOptions(JsObject(Map( @@ -219,11 +220,16 @@ class HtCondorRuntimeAttributesSpec extends WordSpecLike with Matchers { assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) } - "throw an exception when tries to validate an invalid disk entry" in { + "throw an exception when tries to validate an invalid String disk entry" in { val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { docker: "ubuntu:latest" disk: "value" }""").head assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting memory runtime attribute to be an Integer or String with format '8 GB'") } + "throw an exception when tries to validate an invalid Integer array disk entry" in { + val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { docker: "ubuntu:latest" disk: [1] }""").head + assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting disk runtime attribute to be an Integer or String with format '8 GB'") + } + "use workflow options as default if disk key is missing" in { val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head val expectedRuntimeAttributes = staticDefaults.copy(disk = MemorySize.parse("65 GB").get) @@ -237,37 +243,59 @@ class HtCondorRuntimeAttributesSpec extends WordSpecLike with Matchers { val shouldBeIgnored = workflowOptionsWithDefaultRA(Map()) assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) } + + "return an instance of itself when tries to validate a valid native specs entry" in { + val expectedRuntimeAttributes = staticDefaults.copy(nativeSpecs = Option(Array("spec1", "spec2"))) + val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { nativeSpecs: ["spec1", "spec2"] }""").head + assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, emptyWorkflowOptions, expectedRuntimeAttributes) + } + + "throw an exception when tries to validate an invalid native specs entry" in { + val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { nativeSpecs: [1, 2] }""").head + assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting nativeSpecs runtime attribute to be an Array of Strings.") + } } - private def assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes: Map[String, WdlValue], workflowOptions: WorkflowOptions, expectedRuntimeAttributes: HtCondorRuntimeAttributes): Unit = { + private def assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes: Map[String, WdlValue], + workflowOptions: WorkflowOptions, + expectedRuntimeAttributes: HtCondorRuntimeAttributes) = { try { - assert(HtCondorRuntimeAttributes(runtimeAttributes, workflowOptions) == expectedRuntimeAttributes) + val actualRuntimeAttr = HtCondorRuntimeAttributes(runtimeAttributes, workflowOptions) + assert(actualRuntimeAttr.cpu == expectedRuntimeAttributes.cpu) + assert(actualRuntimeAttr.disk == expectedRuntimeAttributes.disk) + assert(actualRuntimeAttr.memory == expectedRuntimeAttributes.memory) + assert(actualRuntimeAttr.continueOnReturnCode == expectedRuntimeAttributes.continueOnReturnCode) + assert(actualRuntimeAttr.failOnStderr == expectedRuntimeAttributes.failOnStderr) + assert(actualRuntimeAttr.dockerWorkingDir == expectedRuntimeAttributes.dockerWorkingDir) + assert(actualRuntimeAttr.dockerImage == expectedRuntimeAttributes.dockerImage) + assert(actualRuntimeAttr.dockerOutputDir == expectedRuntimeAttributes.dockerOutputDir) + expectedRuntimeAttributes.nativeSpecs match { + case Some(ns) => assert(ns.deep == expectedRuntimeAttributes.nativeSpecs.get.deep) + case None => assert(expectedRuntimeAttributes.nativeSpecs.isEmpty) + } } catch { case ex: RuntimeException => fail(s"Exception was not expected but received: ${ex.getMessage}") } - () } - private def assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes: Map[String, WdlValue], exMsg: String): Unit = { + private def assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes: Map[String, WdlValue], exMsg: String) = { try { HtCondorRuntimeAttributes(runtimeAttributes, emptyWorkflowOptions) fail("A RuntimeException was expected.") } catch { case ex: RuntimeException => assert(ex.getMessage.contains(exMsg)) } - () } private def createRuntimeAttributes(wdlSource: WdlSource, runtimeAttributes: String): Seq[Map[String, WdlValue]] = { val workflowDescriptor = buildWorkflowDescriptor(wdlSource, runtime = runtimeAttributes) def createLookup(call: Call): ScopedLookupFunction = { - val declarations = workflowDescriptor.workflowNamespace.workflow.declarations ++ call.task.declarations val knownInputs = workflowDescriptor.inputs - WdlExpression.standardLookupFunction(knownInputs, declarations, NoFunctions) + call.lookupFunction(knownInputs, NoFunctions) } - workflowDescriptor.workflowNamespace.workflow.calls map { + workflowDescriptor.workflow.taskCalls.toSeq map { call => val ra = call.task.runtimeAttributes.attrs mapValues { _.evaluate(createLookup(call), NoFunctions) } TryUtil.sequenceMap(ra, "Runtime attributes evaluation").get diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala index c2c4d101ff1..e0e847e567d 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala @@ -2,7 +2,7 @@ package cromwell.backend.impl.htcondor.caching.localization import java.nio.file.Files -import cromwell.core.{JobOutput, JobOutputs} +import cromwell.core.{JobOutput, CallOutputs} import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} import wdl4s.types.{WdlArrayType, WdlFileType} import wdl4s.values.{WdlArray, WdlSingleFile, WdlString} @@ -33,7 +33,7 @@ class CachedResultLocalizationSpec extends WordSpecLike with Matchers with Befor } "localize cached job outputs which are WDL files using symbolic link" in { - val outputs: JobOutputs = Map("File1" -> JobOutput(WdlSingleFile(defaultCachedFile.toAbsolutePath.toString))) + val outputs: CallOutputs = Map("File1" -> JobOutput(WdlSingleFile(defaultCachedFile.toAbsolutePath.toString))) val newJobOutputs = cachedResults.localizeCachedOutputs(newTmpDir, outputs) newJobOutputs foreach { case (lqn, jobOutput) => assert(jobOutput.wdlValue.valueString == newTmpFile.toString) diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala index fa675e758aa..d638b82d94e 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala @@ -8,7 +8,7 @@ import com.mongodb.util.JSON import com.mongodb.{DBObject, WriteResult} import com.typesafe.config.{Config, ConfigFactory} import cromwell.backend.{MemorySize, BackendJobDescriptorKey} -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.backend.impl.htcondor.HtCondorRuntimeAttributes import cromwell.backend.impl.htcondor.caching.CacheActor._ import cromwell.backend.impl.htcondor.caching.exception.CachedResultNotFoundException @@ -20,7 +20,7 @@ import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.mockito.MockitoSugar import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, MustMatchers, WordSpecLike} -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.values.WdlString class MongoCacheActorSpec extends TestKit(ActorSystem("MongoCacheProviderActorSpecSystem")) with WordSpecLike with MustMatchers @@ -33,10 +33,10 @@ class MongoCacheActorSpec extends TestKit(ActorSystem("MongoCacheProviderActorSp val mongoDbCollectionMock = mock[MongoCollection] val memorySize = MemorySize.parse("0.512 GB").get val diskSize = MemorySize.parse("1.024 GB").get - val runtimeConfig = HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), Some("tool-name"), Some("/workingDir"), Some("/outputDir"), true, 1, memorySize, diskSize) + val runtimeConfig = HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), Some("tool-name"), Some("/workingDir"), Some("/outputDir"), true, 1, memorySize, diskSize, None) val jobHash = "88dde49db10f1551299fb9937f313c10" val taskStatus = "done" - val succeededResponseMock = SucceededResponse(BackendJobDescriptorKey(Call(None, "TestJob", null, null, null, None), None, 0), None, Map("test" -> JobOutput(WdlString("Test"))), None, Seq.empty) + val succeededResponseMock = JobSucceededResponse(BackendJobDescriptorKey(TaskCall(Option("taskName"), null, null, null), None, 0), None, Map("test" -> JobOutput(WdlString("Test"))), None, Seq.empty) val serSucceededRespMock = KryoSerializedObject(serialize(succeededResponseMock)) val cachedExecutionResult = MongoCachedExecutionResult(jobHash, serSucceededRespMock) val cachedExecutionResultDbObject = JSON.parse(cachedExecutionResult.toJson.toString).asInstanceOf[DBObject] diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala index 7285171b596..574e581533a 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala @@ -2,22 +2,22 @@ package cromwell.backend.impl.jes import java.net.URL -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.http.HttpTransport -import com.google.api.client.json.JsonFactory import com.google.api.services.genomics.Genomics +import cromwell.core.WorkflowOptions +import cromwell.filesystems.gcs.auth.GoogleAuthMode -object GenomicsFactory { +case class GenomicsFactory(applicationName: String, authMode: GoogleAuthMode, endpointUrl: URL) { - def apply(applicationName: String, credential: Credential, endpointUrl: URL): Genomics = { - GoogleGenomics.from(applicationName, endpointUrl, credential, credential.getJsonFactory, credential.getTransport) - } + def withOptions(options: WorkflowOptions) = { + val credential = authMode.credential(options) - // Wrapper object around Google's Genomics class providing a convenience 'from' "method" - object GoogleGenomics { - def from(applicationName: String, endpointUrl: URL, credential: Credential, jsonFactory: JsonFactory, httpTransport: HttpTransport): Genomics = { - new Genomics.Builder(httpTransport, jsonFactory, credential).setApplicationName(applicationName).setRootUrl(endpointUrl.toString).build - } + new Genomics.Builder( + credential.getTransport, + credential.getJsonFactory, + credential) + .setApplicationName(applicationName) + .setRootUrl(endpointUrl.toString) + .build } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala index a08be8df959..84da1efbc13 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala @@ -9,29 +9,26 @@ import better.files._ import cats.instances.future._ import cats.syntax.functor._ import com.google.api.client.googleapis.json.GoogleJsonResponseException +import com.google.cloud.storage.contrib.nio.CloudStoragePath import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand import cromwell.backend.async.AsyncBackendJobExecutionActor.{ExecutionMode, JobId} import cromwell.backend.async.{AbortedExecutionHandle, AsyncBackendJobExecutionActor, ExecutionHandle, FailedNonRetryableExecutionHandle, FailedRetryableExecutionHandle, NonRetryableExecution, SuccessfulExecutionHandle} -import cromwell.backend.impl.jes.JesImplicits.PathString import cromwell.backend.impl.jes.JesJobExecutionActor.JesOperationIdKey import cromwell.backend.impl.jes.RunStatus.TerminalRunStatus import cromwell.backend.impl.jes.io._ import cromwell.backend.impl.jes.statuspolling.JesPollingActorClient -import cromwell.backend.{AttemptedLookupResult, BackendJobDescriptor, BackendWorkflowDescriptor, PreemptedException} +import cromwell.backend.wdl.OutputEvaluator +import cromwell.backend.{BackendJobDescriptor, BackendWorkflowDescriptor, PreemptedException} import cromwell.core.Dispatcher.BackendDispatcher import cromwell.core._ import cromwell.core.logging.JobLogging +import cromwell.core.path.proxy.PathProxy import cromwell.core.retry.{Retry, SimpleExponentialBackoff} -import cromwell.filesystems.gcs.NioGcsPath import cromwell.services.keyvalue.KeyValueServiceActor._ import cromwell.services.metadata._ -import wdl4s.AstTools._ -import wdl4s.WdlExpression.ScopedLookupFunction import wdl4s._ -import wdl4s.command.ParameterCommandPart import wdl4s.expression.NoFunctions -import wdl4s.util.TryUtil import wdl4s.values._ import scala.concurrent.duration._ @@ -58,6 +55,7 @@ object JesAsyncBackendJobExecutionActor { object WorkflowOptionKeys { val MonitoringScript = "monitoring_script" val GoogleProject = "google_project" + val GoogleComputeServiceAccount = "google_compute_service_account" } @@ -68,7 +66,7 @@ object JesAsyncBackendJobExecutionActor { * ask them for results. */ case class JesPendingExecutionHandle(jobDescriptor: BackendJobDescriptor, - jesOutputs: Seq[JesFileOutput], + jesOutputs: Set[JesFileOutput], run: Run, previousStatus: Option[RunStatus]) extends ExecutionHandle { override val isDone = false @@ -92,20 +90,20 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes override val pollingActor = jesBackendSingletonActor override lazy val pollBackOff = SimpleExponentialBackoff( - initialInterval = 30 seconds, maxInterval = 10 minutes, multiplier = 1.1) + initialInterval = 30 seconds, maxInterval = jesAttributes.maxPollingInterval seconds, multiplier = 1.1) override lazy val executeOrRecoverBackOff = SimpleExponentialBackoff( initialInterval = 3 seconds, maxInterval = 20 seconds, multiplier = 1.1) - private lazy val workflowDescriptor = jobDescriptor.workflowDescriptor + override lazy val workflowDescriptor = jobDescriptor.workflowDescriptor private lazy val call = jobDescriptor.key.call override lazy val retryable = jobDescriptor.key.attempt <= runtimeAttributes.preemptible private lazy val cmdInput = - JesFileInput(ExecParamName, jesCallPaths.gcsExecPath.toString, Paths.get(jesCallPaths.gcsExecFilename), workingDisk) + JesFileInput(ExecParamName, jesCallPaths.script.toUri.toString, Paths.get(jesCallPaths.scriptFilename), workingDisk) private lazy val jesCommandLine = s"/bin/bash ${cmdInput.containerPath}" - private lazy val rcJesOutput = JesFileOutput(returnCodeFilename, returnCodeGcsPath.toString, Paths.get(returnCodeFilename), workingDisk) + private lazy val rcJesOutput = JesFileOutput(returnCodeFilename, returnCodeGcsPath.toUri.toString, Paths.get(returnCodeFilename), workingDisk) private lazy val standardParameters = Seq(rcJesOutput) private lazy val returnCodeContents = Try(File(returnCodeGcsPath).contentAsString) @@ -129,26 +127,19 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes override def receive: Receive = pollingActorClientReceive orElse jesReceiveBehavior orElse super.receive - private def globOutputPath(glob: String) = callRootPath.resolve(s"glob-${glob.md5Sum}/") - private def gcsAuthParameter: Option[JesInput] = { - if (jesAttributes.gcsFilesystemAuth.requiresAuthFile || dockerConfiguration.isDefined) - Option(JesLiteralInput(ExtraConfigParamName, jesCallPaths.gcsAuthFilePath.toString)) + if (jesAttributes.auths.gcs.requiresAuthFile || dockerConfiguration.isDefined) + Option(JesLiteralInput(ExtraConfigParamName, jesCallPaths.gcsAuthFilePath.toUri.toString)) else None } private lazy val callContext = CallContext( callRootPath, - jesStdoutFile.toString, - jesStderrFile.toString + jesStdoutFile.toUri.toString, + jesStderrFile.toUri.toString ) - private[jes] lazy val callEngineFunctions = new JesExpressionFunctions(List(jesCallPaths.gcsFileSystem), callContext) - - private val lookup: ScopedLookupFunction = { - val declarations = workflowDescriptor.workflowNamespace.workflow.declarations ++ call.task.declarations - WdlExpression.standardLookupFunction(jobDescriptor.inputs, declarations, callEngineFunctions) - } + private[jes] lazy val callEngineFunctions = new JesExpressionFunctions(List(jesCallPaths.gcsPathBuilder), callContext) /** * Takes two arrays of remote and local WDL File paths and generates the necessary JesInputs. @@ -170,43 +161,28 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes * relativeLocalizationPath("gs://some/bucket/foo.txt") -> "some/bucket/foo.txt" */ private def relativeLocalizationPath(file: WdlFile): WdlFile = { - Try(getPath(file.value)) match { - case Success(gcsPath: NioGcsPath) => WdlFile(gcsPath.bucket + "/" + gcsPath.objectName, file.isGlob) - case Success(gcsPath) => file - case Failure(e) => file + getPath(file.value) match { + case Success(path) => + val value: WdlSource = path.toUri.getHost + path.toUri.getPath + WdlFile(value, file.isGlob) + case _ => file } } - private[jes] def generateJesInputs(jobDescriptor: BackendJobDescriptor): Iterable[JesInput] = { - /** - * Commands in WDL tasks can also generate input files. For example: ./my_exec --file=${write_lines(arr)} - * - * write_lines(arr) would produce a string-ified version of the array stored as a GCS path. The next block of code - * will go through each ${...} expression within the task's command section and find all write_*() ASTs and - * evaluate them so the files are written to GCS and the they can be included as inputs to Google's Pipeline object - */ - val commandExpressions = jobDescriptor.key.scope.task.commandTemplate.collect({ - case x: ParameterCommandPart => x.expression - }) - - val writeFunctionAsts = commandExpressions.map(_.ast).flatMap(x => AstTools.findAsts(x, "FunctionCall")).collect({ - case y if y.getAttribute("name").sourceString.startsWith("write_") => y - }) + private[jes] def generateJesInputs(jobDescriptor: BackendJobDescriptor): Set[JesInput] = { - val evaluatedExpressionMap = writeFunctionAsts map { ast => - val expression = WdlExpression(ast) - val value = expression.evaluate(lookup, callEngineFunctions) - expression.toWdlString.md5SumShort -> value - } toMap - - val writeFunctionFiles = evaluatedExpressionMap collect { case (k, v: Success[_]) => k -> v.get } collect { case (k, v: WdlFile) => k -> Seq(v)} + val writeFunctionFiles = call.task.evaluateFilesFromCommand(jobDescriptor.fullyQualifiedInputs, callEngineFunctions) map { + case (expression, file) => expression.toWdlString.md5SumShort -> Seq(file) + } - /** Collect all WdlFiles from inputs to the call */ - val callInputFiles: Map[FullyQualifiedName, Seq[WdlFile]] = jobDescriptor.inputs mapValues { _.collectAsSeq { case w: WdlFile => w } } + /* Collect all WdlFiles from inputs to the call */ + val callInputFiles: Map[FullyQualifiedName, Seq[WdlFile]] = jobDescriptor.fullyQualifiedInputs mapValues { _.collectAsSeq { case w: WdlFile => w } } - (callInputFiles ++ writeFunctionFiles) flatMap { + val inputs = (callInputFiles ++ writeFunctionFiles) flatMap { case (name, files) => jesInputsFromWdlFiles(name, files, files.map(relativeLocalizationPath), jobDescriptor) } + + inputs.toSet } /** @@ -236,33 +212,56 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes if (referenceName.length <= 127) referenceName else referenceName.md5Sum } - private[jes] def generateJesOutputs(jobDescriptor: BackendJobDescriptor): Seq[JesFileOutput] = { - val wdlFileOutputs = jobDescriptor.key.scope.task.outputs flatMap { taskOutput => - taskOutput.requiredExpression.evaluateFiles(lookup, NoFunctions, taskOutput.wdlType) match { - case Success(wdlFiles) => wdlFiles map relativeLocalizationPath - case Failure(ex) => - jobLogger.warn(s"Could not evaluate $taskOutput: ${ex.getMessage}", ex) - Seq.empty[WdlFile] - } + private[jes] def findGlobOutputs(jobDescriptor: BackendJobDescriptor): Set[WdlGlobFile] = { + val globOutputs = (call.task.findOutputFiles(jobDescriptor.fullyQualifiedInputs, NoFunctions) map relativeLocalizationPath) collect { + case glob: WdlGlobFile => glob } + globOutputs.distinct.toSet + } + + private[jes] def generateJesOutputs(jobDescriptor: BackendJobDescriptor): Set[JesFileOutput] = { + val wdlFileOutputs = call.task.findOutputFiles(jobDescriptor.fullyQualifiedInputs, NoFunctions) map relativeLocalizationPath - // Create the mappings. GLOB mappings require special treatment (i.e. stick everything matching the glob in a folder) - wdlFileOutputs.distinct map { wdlFile => - val destination = wdlFile match { - case WdlSingleFile(filePath) => callRootPath.resolve(filePath).toString - case WdlGlobFile(filePath) => globOutputPath(filePath).toString + val outputs = wdlFileOutputs.distinct flatMap { wdlFile => + wdlFile match { + case singleFile: WdlSingleFile => List(generateJesSingleFileOutputs(singleFile)) + case globFile: WdlGlobFile => generateJesGlobFileOutputs(globFile) } - val (relpath, disk) = relativePathAndAttachedDisk(wdlFile.value, runtimeAttributes.disks) - JesFileOutput(makeSafeJesReferenceName(wdlFile.value), destination, relpath, disk) } + + outputs.toSet + } + + private def generateJesSingleFileOutputs(wdlFile: WdlSingleFile): JesFileOutput = { + val destination = callRootPath.resolve(wdlFile.value.stripPrefix("/")).toUri.toString + val (relpath, disk) = relativePathAndAttachedDisk(wdlFile.value, runtimeAttributes.disks) + JesFileOutput(makeSafeJesReferenceName(wdlFile.value), destination, relpath, disk) + } + + private def generateJesGlobFileOutputs(wdlFile: WdlGlobFile): List[JesFileOutput] = { + val globName = callEngineFunctions.globName(wdlFile.value) + val globDirectory = globName + "/" + val globListFile = globName + ".list" + val gcsGlobDirectoryDestinationPath = callRootPath.resolve(globDirectory).toUri.toString + val gcsGlobListFileDestinationPath = callRootPath.resolve(globListFile).toUri.toString + + val (_, globDirectoryDisk) = relativePathAndAttachedDisk(wdlFile.value, runtimeAttributes.disks) + + // We need both the glob directory and the glob list: + List( + // The glob directory: + JesFileOutput(makeSafeJesReferenceName(globDirectory), gcsGlobDirectoryDestinationPath, Paths.get(globDirectory + "*"), globDirectoryDisk), + // The glob list file: + JesFileOutput(makeSafeJesReferenceName(globListFile), gcsGlobListFileDestinationPath, Paths.get(globListFile), globDirectoryDisk) + ) } private def instantiateCommand: Try[String] = { - val backendInputs = jobDescriptor.inputs mapValues gcsPathToLocal - jobDescriptor.call.instantiateCommandLine(backendInputs, callEngineFunctions, gcsPathToLocal) + val backendInputs = jobDescriptor.inputDeclarations mapValues gcsPathToLocal + jobDescriptor.call.task.instantiateCommand(backendInputs, callEngineFunctions, valueMapper = gcsPathToLocal) } - private def uploadCommandScript(command: String, withMonitoring: Boolean): Future[Unit] = { + private def uploadCommandScript(command: String, withMonitoring: Boolean, globFiles: Set[WdlGlobFile]): Future[Unit] = { val monitoring = if (withMonitoring) { s"""|touch $JesMonitoringLogFile |chmod u+x $JesMonitoringScript @@ -272,6 +271,22 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes val tmpDir = File(JesWorkingDisk.MountPoint)./("tmp").path val rcPath = File(JesWorkingDisk.MountPoint)./(returnCodeFilename).path + def globManipulation(globFile: WdlGlobFile) = { + + val globDir = callEngineFunctions.globName(globFile.value) + val (_, disk) = relativePathAndAttachedDisk(globFile.value, runtimeAttributes.disks) + val globDirectory = Paths.get(s"${disk.mountPoint.toAbsolutePath}/$globDir/") + val globList = Paths.get(s"${disk.mountPoint.toAbsolutePath}/$globDir.list") + + s""" + |mkdir $globDirectory + |ln ${globFile.value} $globDirectory + |ls -1 $globDirectory > $globList + """.stripMargin + } + + val globManipulations = globFiles.map(globManipulation).mkString("\n") + val fileContent = s""" |#!/bin/bash @@ -281,35 +296,34 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes |( |cd ${JesWorkingDisk.MountPoint} |$command + |$globManipulations |) |echo $$? > $rcPath """.stripMargin.trim - def writeScript(): Future[Unit] = Future { File(jesCallPaths.gcsExecPath).write(fileContent) } void - - implicit val system = context.system - Retry.withRetry( - writeScript, - isTransient = isTransientJesException, - isFatal = isFatalJesException - ) + Future(File(jesCallPaths.script).write(fileContent)) void } private def googleProject(descriptor: BackendWorkflowDescriptor): String = { descriptor.workflowOptions.getOrElse(WorkflowOptionKeys.GoogleProject, jesAttributes.project) } + private def computeServiceAccount(descriptor: BackendWorkflowDescriptor): String = { + descriptor.workflowOptions.getOrElse(WorkflowOptionKeys.GoogleComputeServiceAccount, jesAttributes.computeServiceAccount) + } + private def createJesRun(jesParameters: Seq[JesParameter], runIdForResumption: Option[String]): Future[Run] = { def createRun() = Future(Run( runIdForResumption, jobDescriptor = jobDescriptor, runtimeAttributes = runtimeAttributes, - callRootPath = callRootPath.toString, + callRootPath = callRootPath.toUri.toString, commandLine = jesCommandLine, logFileName = jesLogFilename, jesParameters, googleProject(jobDescriptor.workflowDescriptor), + computeServiceAccount(jobDescriptor.workflowDescriptor), retryable, initializationData.genomics )) @@ -332,8 +346,8 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes } protected def runWithJes(command: String, - jesInputs: Seq[JesInput], - jesOutputs: Seq[JesFileOutput], + jesInputs: Set[JesInput], + jesOutputs: Set[JesFileOutput], runIdForResumption: Option[String], withMonitoring: Boolean): Future[ExecutionHandle] = { @@ -342,7 +356,7 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes val jesParameters = standardParameters ++ gcsAuthParameter ++ jesInputs ++ jesOutputs val jesJobSetup = for { - _ <- uploadCommandScript(command, withMonitoring) + _ <- uploadCommandScript(command, withMonitoring, findGlobOutputs(jobDescriptor)) run <- createJesRun(jesParameters, runIdForResumption) _ = tellMetadata(Map(CallMetadataKeys.JobId -> run.runId)) } yield run @@ -359,8 +373,8 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes } private def startExecuting(monitoringOutput: Option[JesFileOutput], mode: ExecutionMode): Future[ExecutionHandle] = { - val jesInputs: Seq[JesInput] = generateJesInputs(jobDescriptor).toSeq ++ monitoringScript :+ cmdInput - val jesOutputs: Seq[JesFileOutput] = generateJesOutputs(jobDescriptor) ++ monitoringOutput + val jesInputs: Set[JesInput] = generateJesInputs(jobDescriptor) ++ monitoringScript + cmdInput + val jesOutputs: Set[JesFileOutput] = generateJesOutputs(jobDescriptor) ++ monitoringOutput instantiateCommand match { case Success(command) => runWithJes(command, jesInputs, jesOutputs, mode.jobId.collectFirst { case j: JesJobId => j.operationId }, monitoringScript.isDefined) @@ -425,29 +439,7 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes /** * Fire and forget start info to the metadata service */ - private def tellStartMetadata(): Unit = { - val runtimeAttributesMetadata: Map[String, Any] = runtimeAttributes.asMap map { - case (key, value) => s"runtimeAttributes:$key" -> value - } - - var fileMetadata: Map[String, Any] = jesCallPaths.metadataPaths - if (monitoringOutput.nonEmpty) { - // TODO: Move this to JesCallPaths - fileMetadata += JesMetadataKeys.MonitoringLog -> monitoringOutput.get.gcs - } - - val otherMetadata: Map[String, Any] = Map( - JesMetadataKeys.GoogleProject -> jesAttributes.project, - JesMetadataKeys.ExecutionBucket -> jesAttributes.executionBucket, - JesMetadataKeys.EndpointUrl -> jesAttributes.endpointUrl, - "preemptible" -> preemptible, - "cache:allowResultReuse" -> true - ) - - val metadataKeyValues = runtimeAttributesMetadata ++ fileMetadata ++ otherMetadata - - tellMetadata(metadataKeyValues) - } + private def tellStartMetadata() = tellMetadata(metadataKeyValues) /** * Fire and forget info to the metadata service @@ -457,59 +449,32 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), metadataKeyValues) } - private def customLookupFunction(alreadyGeneratedOutputs: Map[String, WdlValue])(toBeLookedUp: String): WdlValue = alreadyGeneratedOutputs.getOrElse(toBeLookedUp, lookup(toBeLookedUp)) - - private[jes] def wdlValueToGcsPath(jesOutputs: Seq[JesFileOutput])(value: WdlValue): WdlValue = { + private[jes] def wdlValueToGcsPath(jesOutputs: Set[JesFileOutput])(value: WdlValue): WdlValue = { def toGcsPath(wdlFile: WdlFile) = jesOutputs collectFirst { case o if o.name == makeSafeJesReferenceName(wdlFile.valueString) => WdlFile(o.gcs) } getOrElse value + value match { case wdlArray: WdlArray => wdlArray map wdlValueToGcsPath(jesOutputs) case wdlMap: WdlMap => wdlMap map { case (k, v) => wdlValueToGcsPath(jesOutputs)(k) -> wdlValueToGcsPath(jesOutputs)(v) } - case file: WdlFile => if (file.value.isGcsUrl) file else toGcsPath(file) + case file: WdlFile => toGcsPath(file) case other => other } } - private def outputLookup(taskOutput: TaskOutput, currentList: Seq[AttemptedLookupResult]) = for { - /** - * This will evaluate the task output expression and coerces it to the task output's type. - * If the result is a WdlFile, then attempt to find the JesOutput with the same path and - * return a WdlFile that represents the GCS path and not the local path. For example, - * - *
-    * output {
-    *   File x = "out" + ".txt"
-    * }
-    * 
- * - * "out" + ".txt" is evaluated to WdlString("out.txt") and then coerced into a WdlFile("out.txt") - * Then, via wdlFileToGcsPath(), we attempt to find the JesOutput with .name == "out.txt". - * If it is found, then WdlFile("gs://some_bucket/out.txt") will be returned. - */ - wdlValue <- taskOutput.requiredExpression.evaluate(customLookupFunction(currentList.toLookupMap), callEngineFunctions) - coercedValue <- taskOutput.wdlType.coerceRawValue(wdlValue) - value = wdlValueToGcsPath(generateJesOutputs(jobDescriptor))(coercedValue) - } yield value + private def postProcess: Try[CallOutputs] = { + def wdlValueToSuccess(value: WdlValue): Try[WdlValue] = Success(value) - - private def outputFoldingFunction: (Seq[AttemptedLookupResult], TaskOutput) => Seq[AttemptedLookupResult] = { - (currentList: Seq[AttemptedLookupResult], taskOutput: TaskOutput) => { - currentList ++ Seq(AttemptedLookupResult(taskOutput.name, outputLookup(taskOutput, currentList))) - } - } - - private def postProcess: Try[JobOutputs] = { - val outputs = call.task.outputs - val outputMappings = outputs.foldLeft(Seq.empty[AttemptedLookupResult])(outputFoldingFunction).map(_.toPair).toMap - TryUtil.sequenceMap(outputMappings) map { outputMap => - outputMap mapValues { v => JobOutput(v) } - } + OutputEvaluator.evaluateOutputs( + jobDescriptor, + callEngineFunctions, + (wdlValueToSuccess _).compose(wdlValueToGcsPath(generateJesOutputs(jobDescriptor))) + ) } - private def handleSuccess(outputMappings: Try[JobOutputs], returnCode: Int, jobDetritusFiles: Map[String, String], executionHandle: ExecutionHandle, events: Seq[ExecutionEvent]): ExecutionHandle = { + private def handleSuccess(outputMappings: Try[CallOutputs], returnCode: Int, jobDetritusFiles: Map[String, Path], executionHandle: ExecutionHandle, events: Seq[ExecutionEvent]): ExecutionHandle = { outputMappings match { case Success(outputs) => SuccessfulExecutionHandle(outputs, returnCode, jobDetritusFiles, events) case Failure(ex: CromwellAggregatedException) if ex.throwables collectFirst { case s: SocketTimeoutException => s } isDefined => @@ -588,10 +553,10 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes FailedNonRetryableExecutionHandle(new RuntimeException( s"execution failed: could not parse return code as integer: ${returnCodeContents.get}")).future case _: RunStatus.Success if !continueOnReturnCode.continueFor(returnCode.get) => - val badReturnCodeMessage = s"Call ${call.fullyQualifiedName}: return code was ${returnCode.getOrElse("(none)")}" + val badReturnCodeMessage = s"Call ${jobDescriptor.key.tag}: return code was ${returnCode.getOrElse("(none)")}" FailedNonRetryableExecutionHandle(new RuntimeException(badReturnCodeMessage), returnCode.toOption).future case success: RunStatus.Success => - handleSuccess(postProcess, returnCode.get, jesCallPaths.detritusPaths.mapValues(_.toString), handle, success.eventList).future + handleSuccess(postProcess, returnCode.get, jesCallPaths.detritusPaths, handle, success.eventList).future case RunStatus.Failed(errorCode, errorMessage, _, _, _, _) => handleFailure(errorCode, errorMessage) } } catch { @@ -611,8 +576,8 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes * @param gcsPath The input path * @return A path which is unique per input path */ - private def localFilePathFromCloudStoragePath(mountPoint: Path, gcsPath: NioGcsPath): Path = { - mountPoint.resolve(gcsPath.bucket).resolve(gcsPath.objectName) + private def localFilePathFromCloudStoragePath(mountPoint: Path, gcsPath: CloudStoragePath): Path = { + mountPoint.resolve(gcsPath.bucket()).resolve(gcsPath.toUri.getPath.stripPrefix("/")) } /** @@ -625,11 +590,14 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes private[jes] def gcsPathToLocal(wdlValue: WdlValue): WdlValue = { wdlValue match { case wdlFile: WdlFile => - Try(getPath(wdlFile.valueString)) match { - case Success(gcsPath: NioGcsPath) => + getPath(wdlFile.valueString) match { + case Success(gcsPath: CloudStoragePath) => WdlFile(localFilePathFromCloudStoragePath(workingDisk.mountPoint, gcsPath).toString, wdlFile.isGlob) - case Success(otherPath) => wdlValue - case Failure(e) => wdlValue + case Success(proxy: PathProxy) => + proxy.unbox(classOf[CloudStoragePath]) map { gcsPath => + WdlFile(localFilePathFromCloudStoragePath(workingDisk.mountPoint, gcsPath).toString, wdlFile.isGlob) + } getOrElse wdlValue + case _ => wdlValue } case wdlArray: WdlArray => wdlArray map gcsPathToLocal case wdlMap: WdlMap => wdlMap map { case (k, v) => gcsPathToLocal(k) -> gcsPathToLocal(v) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala index 2e1d00b0e43..ac2475e6430 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala @@ -6,30 +6,29 @@ import cats.data._ import cats.data.Validated._ import cats.syntax.cartesian._ import com.typesafe.config.Config -import cromwell.backend.impl.jes.JesImplicits.GoogleAuthWorkflowOptions -import cromwell.core.WorkflowOptions -import cromwell.filesystems.gcs.{GoogleAuthMode, GoogleConfiguration} +import cromwell.backend.impl.jes.authentication.JesAuths +import cromwell.core.ErrorOr._ +import cromwell.filesystems.gcs.GoogleConfiguration import lenthall.config.ValidatedConfig._ import net.ceedubs.ficus.Ficus._ -import cromwell.core.ErrorOr._ import wdl4s.ExceptionWithErrors case class JesAttributes(project: String, - genomicsAuth: GoogleAuthMode, - gcsFilesystemAuth: GoogleAuthMode, + computeServiceAccount: String, + auths: JesAuths, executionBucket: String, endpointUrl: URL, - maxPollingInterval: Int) { - def genomicsCredential(options: WorkflowOptions) = genomicsAuth.credential(options.toGoogleAuthOptions) - def gcsCredential(options: WorkflowOptions) = gcsFilesystemAuth.credential(options.toGoogleAuthOptions) -} + maxPollingInterval: Int, + qps: Int) object JesAttributes { + val GenomicsApiDefaultQps = 1000 private val jesKeys = Set( "project", "root", "maximum-polling-interval", + "compute-service-account", "dockerhub", "genomics", "filesystems", @@ -47,14 +46,17 @@ object JesAttributes { val executionBucket: ValidatedNel[String, String] = backendConfig.validateString("root") val endpointUrl: ErrorOr[URL] = backendConfig.validateURL("genomics.endpoint-url") val maxPollingInterval: Int = backendConfig.as[Option[Int]]("maximum-polling-interval").getOrElse(600) + val computeServiceAccount: String = backendConfig.as[Option[String]]("genomics.compute-service-account").getOrElse("default") val genomicsAuthName: ErrorOr[String] = backendConfig.validateString("genomics.auth") val gcsFilesystemAuthName: ErrorOr[String] = backendConfig.validateString("filesystems.gcs.auth") + val qps = backendConfig.as[Option[Int]]("genomics-api-queries-per-100-seconds").getOrElse(GenomicsApiDefaultQps) / 100 + (project |@| executionBucket |@| endpointUrl |@| genomicsAuthName |@| gcsFilesystemAuthName) map { (_, _, _, _, _) } flatMap { case (p, b, u, genomicsName, gcsName) => (googleConfig.auth(genomicsName) |@| googleConfig.auth(gcsName)) map { case (genomicsAuth, gcsAuth) => - JesAttributes(p, genomicsAuth, gcsAuth, b, u, maxPollingInterval) + JesAttributes(p, computeServiceAccount, JesAuths(genomicsAuth, gcsAuth), b, u, maxPollingInterval, qps) } } match { case Valid(r) => r diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala index 92f653c1079..f61769379d6 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala @@ -8,9 +8,9 @@ import cromwell.backend._ import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction import cromwell.backend.impl.jes.callcaching.JesBackendFileHashing import cromwell.backend.validation.RuntimeAttributesKeys +import cromwell.core.CallOutputs import cromwell.core.Dispatcher.BackendDispatcher -import cromwell.core.{ExecutionStore, OutputStore} -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.WdlStandardLibraryFunctions @@ -21,7 +21,7 @@ case class JesBackendLifecycleActorFactory(name: String, configurationDescriptor val jesConfiguration = new JesConfiguration(configurationDescriptor) override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { Option(JesInitializationActor.props(workflowDescriptor, calls, jesConfiguration, serviceRegistryActor).withDispatcher(BackendDispatcher)) } @@ -46,15 +46,15 @@ case class JesBackendLifecycleActorFactory(name: String, configurationDescriptor } override def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - executionStore: ExecutionStore, - outputStore: OutputStore, + calls: Set[TaskCall], + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: Option[BackendInitializationData]) = { // The `JesInitializationActor` will only return a non-`Empty` `JesBackendInitializationData` from a successful `beforeAll` // invocation. HOWEVER, the finalization actor is created regardless of whether workflow initialization was successful // or not. So the finalization actor must be able to handle an empty `JesBackendInitializationData` option, and there is no // `.get` on the initialization data as there is with the execution or cache hit copying actor methods. - Option(JesFinalizationActor.props(workflowDescriptor, calls, jesConfiguration, executionStore, outputStore, initializationData.toJes).withDispatcher(BackendDispatcher)) + Option(JesFinalizationActor.props(workflowDescriptor, calls, jesConfiguration, jobExecutionMap, workflowOutputs, initializationData.toJes).withDispatcher(BackendDispatcher)) } override def runtimeAttributeDefinitions(initializationDataOption: Option[BackendInitializationData]) = staticRuntimeAttributeDefinitions @@ -63,16 +63,21 @@ case class JesBackendLifecycleActorFactory(name: String, configurationDescriptor jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { - val jesCallPaths = initializationData.toJes.get.workflowPaths.toJesCallPaths(jobKey) - new JesExpressionFunctions(List(jesCallPaths.gcsFileSystem), jesCallPaths.callContext) + val jesCallPaths = initializationData.toJes.get.workflowPaths.toJobPaths(jobKey) + new JesExpressionFunctions(List(jesCallPaths.gcsPathBuilder), jesCallPaths.callContext) } override def getExecutionRootPath(workflowDescriptor: BackendWorkflowDescriptor, backendConfig: Config, initializationData: Option[BackendInitializationData]): Path = { - initializationData.toJes.get.workflowPaths.rootPath + initializationData.toJes.get.workflowPaths.executionRoot } - override def backendSingletonActorProps = Option(JesBackendSingletonActor.props()) + override def getWorkflowExecutionRootPath(workflowDescriptor: BackendWorkflowDescriptor, backendConfig: Config, + initializationData: Option[BackendInitializationData]): Path = { + initializationData.toJes.get.workflowPaths.workflowRoot + } + + override def backendSingletonActorProps = Option(JesBackendSingletonActor.props(jesConfiguration.qps)) override lazy val fileHashingFunction: Option[FileHashingFunction] = Option(FileHashingFunction(JesBackendFileHashing.getCrc32c)) } @@ -86,8 +91,8 @@ object JesBackendLifecycleActorFactory { } val staticRuntimeAttributeDefinitions = { - import RuntimeAttributesKeys._ import JesRuntimeAttributes._ + import RuntimeAttributesKeys._ Set( RuntimeAttributeDefinition(DockerKey, None, usedInCallCaching = true), diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala index 3b830107a1e..5c1275ae149 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala @@ -1,12 +1,12 @@ package cromwell.backend.impl.jes import akka.actor.{Actor, ActorLogging, Props} -import cromwell.backend.impl.jes.statuspolling.{JesApiQueryManager} +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.DoPoll -class JesBackendSingletonActor extends Actor with ActorLogging { +final case class JesBackendSingletonActor(qps: Int) extends Actor with ActorLogging { - val pollingActor = context.actorOf(JesApiQueryManager.props) + val pollingActor = context.actorOf(JesApiQueryManager.props(qps)) override def receive = { case poll: DoPoll => @@ -16,5 +16,5 @@ class JesBackendSingletonActor extends Actor with ActorLogging { } object JesBackendSingletonActor { - def props(): Props = Props(new JesBackendSingletonActor()) + def props(qps: Int): Props = Props(JesBackendSingletonActor(qps)) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala index 078e2e081f4..a77dd66c7f6 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala @@ -5,7 +5,7 @@ import java.nio.file.Path import akka.actor.{ActorRef, Props} import cromwell.backend.callcaching.CacheHitDuplicating import cromwell.backend.{BackendCacheHitCopyingActor, BackendJobDescriptor} -import cromwell.core.PathCopier +import cromwell.core.path.PathCopier import cromwell.core.logging.JobLogging case class JesCacheHitCopyingActor(override val jobDescriptor: BackendJobDescriptor, @@ -15,9 +15,11 @@ case class JesCacheHitCopyingActor(override val jobDescriptor: BackendJobDescrip extends BackendCacheHitCopyingActor with CacheHitDuplicating with JesJobCachingActorHelper with JobLogging { override protected def duplicate(source: Path, destination: Path) = PathCopier.copy(source, destination).get - override protected def destinationCallRootPath = jesCallPaths.callRootPath + override protected def destinationCallRootPath = jesCallPaths.callExecutionRoot override protected def destinationJobDetritusPaths = jesCallPaths.detritusPaths + + override val workflowDescriptor = jobDescriptor.workflowDescriptor } object JesCacheHitCopyingActor { diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala deleted file mode 100644 index f18daecaa5c..00000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala +++ /dev/null @@ -1,82 +0,0 @@ -package cromwell.backend.impl.jes - -import java.nio.file.Path - -import cromwell.backend.impl.jes.authentication.JesCredentials -import cromwell.backend.io.JobPaths -import cromwell.backend.io.JobPaths._ -import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} -import cromwell.core.CallContext -import cromwell.services.metadata.CallMetadataKeys - -import scala.concurrent.ExecutionContext - -object JesCallPaths { - def apply(jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext): JesCallPaths = { - new JesCallPaths(jobKey, workflowDescriptor, jesConfiguration, credentials) - } - - val JesLogPathKey = "jesLog" - val GcsExecPathKey = "gcsExec" -} - -class JesCallPaths(jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext) extends - JesWorkflowPaths(workflowDescriptor, jesConfiguration, credentials)(ec) { - - val jesLogBasename = { - val index = jobKey.index.map(s => s"-$s").getOrElse("") - s"${jobKey.scope.unqualifiedName}$index" - } - - val callRootPath: Path = { - val callName = jobKey.call.fullyQualifiedName.split('.').last - val call = s"$CallPrefix-$callName" - val shard = jobKey.index map { s => s"$ShardPrefix-$s" } getOrElse "" - val retry = if (jobKey.attempt > 1) s"$AttemptPrefix-${jobKey.attempt}" else "" - - List(call, shard, retry).foldLeft(workflowRootPath)((path, dir) => path.resolve(dir)) - } - - val returnCodeFilename: String = s"$jesLogBasename-rc.txt" - val stdoutFilename: String = s"$jesLogBasename-stdout.log" - val stderrFilename: String = s"$jesLogBasename-stderr.log" - val jesLogFilename: String = s"$jesLogBasename.log" - val gcsExecFilename: String = "exec.sh" - - lazy val returnCodePath: Path = callRootPath.resolve(returnCodeFilename) - lazy val stdoutPath: Path = callRootPath.resolve(stdoutFilename) - lazy val stderrPath: Path = callRootPath.resolve(stderrFilename) - lazy val jesLogPath: Path = callRootPath.resolve(jesLogFilename) - lazy val gcsExecPath: Path = callRootPath.resolve(gcsExecFilename) - lazy val callContext = CallContext(callRootPath, stdoutFilename, stderrFilename) - - /* - TODO: Move various monitoring files path generation here. - - "/cromwell_root" is a well known path, called in the regular JobPaths callDockerRoot. - This JesCallPaths should know about that root, and be able to create the monitoring file paths. - Instead of the AsyncActor creating the paths, the paths could then be shared with the CachingActor. - - Those monitoring paths could then be returned by metadataFiles and detritusFiles. - */ - - lazy val metadataPaths: Map[String, Path] = Map( - CallMetadataKeys.CallRoot -> callRootPath, - CallMetadataKeys.Stdout -> stdoutPath, - CallMetadataKeys.Stderr -> stderrPath, - CallMetadataKeys.BackendLogsPrefix + ":log" -> jesLogPath - ) - - lazy val detritusPaths: Map[String, Path] = Map( - JobPaths.CallRootPathKey -> callRootPath, - JesCallPaths.GcsExecPathKey -> gcsExecPath, - JesCallPaths.JesLogPathKey -> jesLogPath, - JobPaths.StdoutPathKey -> stdoutPath, - JobPaths.StdErrPathKey -> stderrPath, - JobPaths.ReturnCodePathKey -> returnCodePath - ) -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala index 6657250b081..20ec8130d9b 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala @@ -2,14 +2,36 @@ package cromwell.backend.impl.jes import cromwell.backend.BackendConfigurationDescriptor import cromwell.backend.impl.jes.authentication.JesDockerCredentials +import cromwell.backend.impl.jes.io._ import cromwell.core.DockerConfiguration -import cromwell.filesystems.gcs.GoogleConfiguration +import cromwell.core.path.CustomRetryParams +import cromwell.core.retry.SimpleExponentialBackoff +import cromwell.filesystems.gcs.{GoogleConfiguration, RetryableGcsPathBuilderFactory} + +import scala.concurrent.duration._ +import scala.language.postfixOps + +object JesConfiguration { + val GcsRetryParams = CustomRetryParams( + timeout = Duration.Inf, + maxRetries = Option(3), + backoff = SimpleExponentialBackoff(1 seconds, 3 seconds, 1.5D), + isTransient = isTransientJesException, + isFatal = isFatalJesException + ) +} class JesConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) { + private val googleConfig = GoogleConfiguration(configurationDescriptor.globalConfig) + val root = configurationDescriptor.backendConfig.getString("root") - val googleConfig = GoogleConfiguration(configurationDescriptor.globalConfig) val jesAttributes = JesAttributes(googleConfig, configurationDescriptor.backendConfig) + val jesAuths = jesAttributes.auths + val jesComputeServiceAccount = jesAttributes.computeServiceAccount + val gcsPathBuilderFactory = RetryableGcsPathBuilderFactory(jesAuths.gcs, customRetryParams = JesConfiguration.GcsRetryParams) + val genomicsFactory = GenomicsFactory(googleConfig.applicationName, jesAuths.genomics, jesAttributes.endpointUrl) val dockerCredentials = DockerConfiguration.build(configurationDescriptor.backendConfig).dockerCredentials map JesDockerCredentials.apply - val needAuthFileUpload = jesAttributes.gcsFilesystemAuth.requiresAuthFile || dockerCredentials.isDefined + val needAuthFileUpload = jesAuths.gcs.requiresAuthFile || dockerCredentials.isDefined + val qps = jesAttributes.qps } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala index 823108ca81f..bf29a387fdb 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala @@ -1,42 +1,38 @@ package cromwell.backend.impl.jes -import java.nio.file.{FileSystem, Path} +import java.nio.file.{Files, Path} -import better.files._ -import cromwell.backend.wdl.{PureFunctions, ReadLikeFunctions, WriteFunctions} -import cromwell.backend.impl.jes.JesImplicits.PathString +import cromwell.backend.wdl.{ReadLikeFunctions, WriteFunctions} import cromwell.core.CallContext -import cromwell.filesystems.gcs.GcsFileSystem -import wdl4s.expression.WdlStandardLibraryFunctions +import cromwell.core.path.PathBuilder +import cromwell.filesystems.gcs.GcsPathBuilder +import wdl4s.expression.{PureStandardLibraryFunctionsLike, WdlStandardLibraryFunctions} import wdl4s.values._ -import scala.language.postfixOps +import scala.collection.JavaConverters._ import scala.util.{Success, Try} -class JesExpressionFunctions(override val fileSystems: List[FileSystem], - context: CallContext - ) extends WdlStandardLibraryFunctions with PureFunctions with ReadLikeFunctions with WriteFunctions { - import JesExpressionFunctions.EnhancedPath +class JesExpressionFunctions(override val pathBuilders: List[PathBuilder], context: CallContext) + extends WdlStandardLibraryFunctions with PureStandardLibraryFunctionsLike with ReadLikeFunctions with WriteFunctions { - private def globDirectory(glob: String): String = s"glob-${glob.md5Sum}/" + override def writeTempFile(path: String, prefix: String, suffix: String, content: String): String = super[WriteFunctions].writeTempFile(path, prefix, suffix, content) + private[jes] def globDirectory(glob: String): String = globName(glob) + "/" + private[jes] def globName(glob: String) = s"glob-${glob.md5Sum}" override def globPath(glob: String): String = context.root.resolve(globDirectory(glob)).toString override def glob(path: String, pattern: String): Seq[String] = { - File(path.toAbsolutePath(fileSystems).asDirectory). - glob("**/*") map { _.pathAsString } filterNot { _.toString == path } toSeq + val name = globName(pattern) + val listFile = context.root.resolve(s"$name.list").toRealPath() + Files.readAllLines(listFile).asScala map { fileName => context.root.resolve(s"$name/$fileName").toUri.toString } } - override def preMapping(str: String): String = if (!GcsFileSystem.isAbsoluteGcsPath(str)) context.root.resolve(str).toString else str + override def preMapping(str: String): String = if (!GcsPathBuilder.isValidGcsUrl(str)) { + context.root.resolve(str.stripPrefix("/")).toUri.toString + } else str override def stdout(params: Seq[Try[WdlValue]]) = Success(WdlFile(context.stdout)) override def stderr(params: Seq[Try[WdlValue]]) = Success(WdlFile(context.stderr)) override val writeDirectory: Path = context.root } - -object JesExpressionFunctions { - implicit class EnhancedPath(val path: Path) extends AnyVal { - def asDirectory = path.toString.toDirectory(path.getFileSystem) - } -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala index 038c615ddfd..fc2d141bebf 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala @@ -6,25 +6,26 @@ import akka.actor.Props import better.files._ import cats.instances.future._ import cats.syntax.functor._ -import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor, BackendWorkflowFinalizationActor} +import cromwell.backend.{BackendWorkflowDescriptor, BackendWorkflowFinalizationActor, JobExecutionMap} +import cromwell.core.CallOutputs import cromwell.core.Dispatcher.IoDispatcher -import cromwell.core.{ExecutionStore, OutputStore, PathCopier} -import wdl4s.Call +import cromwell.core.path.PathCopier +import wdl4s.TaskCall import scala.concurrent.Future import scala.language.postfixOps object JesFinalizationActor { - def props(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], jesConfiguration: JesConfiguration, - executionStore: ExecutionStore, outputStore: OutputStore, initializationData: Option[JesBackendInitializationData]) = { - Props(new JesFinalizationActor(workflowDescriptor, calls, jesConfiguration, executionStore, outputStore, initializationData)) + def props(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], jesConfiguration: JesConfiguration, + jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: Option[JesBackendInitializationData]) = { + Props(new JesFinalizationActor(workflowDescriptor, calls, jesConfiguration, jobExecutionMap, workflowOutputs, initializationData)) } } class JesFinalizationActor (override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], - jesConfiguration: JesConfiguration, executionStore: ExecutionStore, - outputStore: OutputStore, + override val calls: Set[TaskCall], + jesConfiguration: JesConfiguration, jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: Option[JesBackendInitializationData]) extends BackendWorkflowFinalizationActor { override val configurationDescriptor = jesConfiguration.configurationDescriptor @@ -68,19 +69,19 @@ class JesFinalizationActor (override val workflowDescriptor: BackendWorkflowDesc } private lazy val logPaths: Seq[Path] = { - val allCallPaths = executionStore.store.toSeq collect { - case (backendJobDescriptorKey: BackendJobDescriptorKey, _) => - initializationData map { _.workflowPaths.toJesCallPaths(backendJobDescriptorKey) } + val allCallPaths = jobExecutionMap flatMap { + case (backendJobDescriptor, keys) => + keys map { JesWorkflowPaths(backendJobDescriptor, jesConfiguration)(context.system).toJobPaths(_) } } - allCallPaths.flatten flatMap { callPaths => - Seq(callPaths.stdoutPath, callPaths.stderrPath, callPaths.jesLogPath) + allCallPaths.toSeq flatMap { callPaths => + Seq(callPaths.stdout, callPaths.stderr, callPaths.jesLogPath) } } private def copyLogs(callLogsDirPath: Path, logPaths: Seq[Path]): Unit = { workflowPaths match { - case Some(paths) => logPaths.foreach(PathCopier.copy(paths.rootPath, _, callLogsDirPath)) + case Some(paths) => logPaths.foreach(PathCopier.copy(paths.executionRoot, _, callLogsDirPath)) case None => } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala deleted file mode 100644 index 6c722c756a9..00000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala +++ /dev/null @@ -1,41 +0,0 @@ -package cromwell.backend.impl.jes - -import java.nio.file.{FileSystem, Path} - -import cromwell.core.{PathFactory, WorkflowOptions} -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions -import cromwell.filesystems.gcs.{GcsFileSystem, GoogleAuthMode} - -import scala.util.Try - -object JesImplicits { - implicit class GoogleAuthWorkflowOptions(val workflowOptions: WorkflowOptions) extends AnyVal { - def toGoogleAuthOptions: GoogleAuthMode.GoogleAuthOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = workflowOptions.get(key) - } - } - - object PathBuilder extends PathFactory - - implicit class PathString(val str: String) extends AnyVal { - def isGcsUrl: Boolean = str.startsWith("gs://") - def isUriWithProtocol: Boolean = "^[a-z]+://".r.findFirstIn(str).nonEmpty - - def toPath(fss: List[FileSystem]): Path = PathBuilder.buildPath(str, fss) - def toPath(fs: FileSystem): Path = str.toPath(List(fs)) - - def toAbsolutePath(fss: List[FileSystem]): Path = str.toPath(fss).toAbsolutePath - def toAbsolutePath(fs: FileSystem): Path = str.toAbsolutePath(List(fs)) - - def toDirectory(fss: List[FileSystem]): Path = buildPathAsDirectory(str, fss) - def toDirectory(fs: FileSystem): Path = str.toDirectory(List(fs)) - - // TODO this needs to go away because it's gcs specific. Replacing gcs FS with google implementation (when available) will take care of it - private def buildPathAsDirectory(rawString: String, fileSystems: List[FileSystem]): Path = { - PathBuilder.findFileSystem(rawString, fileSystems, { - case fs: GcsFileSystem => Try(fs.getPathAsDirectory(rawString)) - case fs => Try(fs.getPath(rawString)) - }) - } - } -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala index e76a62e9da3..03cae0dff30 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala @@ -7,21 +7,20 @@ import cats.instances.future._ import cats.syntax.functor._ import com.google.api.services.genomics.Genomics import cromwell.backend.impl.jes.JesInitializationActor._ -import cromwell.backend.impl.jes.authentication.{GcsLocalizing, JesAuthInformation, JesCredentials} +import cromwell.backend.impl.jes.authentication.{GcsLocalizing, JesAuthInformation} import cromwell.backend.impl.jes.io._ import cromwell.backend.validation.RuntimeAttributesDefault import cromwell.backend.validation.RuntimeAttributesKeys._ import cromwell.backend.{BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} -import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.WorkflowOptions -import cromwell.core.retry.Retry -import cromwell.filesystems.gcs.{ClientSecrets, GoogleAuthMode} +import cromwell.filesystems.gcs.auth.{ClientSecrets, GoogleAuthMode} import spray.json.JsObject -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.types.{WdlBooleanType, WdlFloatType, WdlIntegerType, WdlStringType} import wdl4s.values.WdlValue import scala.concurrent.Future +import scala.language.postfixOps import scala.util.Try object JesInitializationActor { @@ -29,14 +28,14 @@ object JesInitializationActor { JesRuntimeAttributes.PreemptibleKey, JesRuntimeAttributes.BootDiskSizeKey, JesRuntimeAttributes.DisksKey) def props(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], jesConfiguration: JesConfiguration, serviceRegistryActor: ActorRef): Props = Props(new JesInitializationActor(workflowDescriptor, calls, jesConfiguration, serviceRegistryActor: ActorRef)) } class JesInitializationActor(override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], + override val calls: Set[TaskCall], private[jes] val jesConfiguration: JesConfiguration, override val serviceRegistryActor: ActorRef) extends BackendWorkflowInitializationActor { @@ -58,14 +57,11 @@ class JesInitializationActor(override val workflowDescriptor: BackendWorkflowDes private[jes] lazy val refreshTokenAuth: Option[JesAuthInformation] = { for { - clientSecrets <- List(jesConfiguration.jesAttributes.gcsFilesystemAuth) collectFirst { case s: ClientSecrets => s } + clientSecrets <- List(jesConfiguration.jesAttributes.auths.gcs) collectFirst { case s: ClientSecrets => s } token <- workflowDescriptor.workflowOptions.get(GoogleAuthMode.RefreshTokenOptionKey).toOption } yield GcsLocalizing(clientSecrets, token) } - private val iOExecutionContext = context.system.dispatchers.lookup(IoDispatcher) - - override protected def coerceDefaultRuntimeAttributes(options: WorkflowOptions): Try[Map[String, WdlValue]] = { RuntimeAttributesDefault.workflowOptionsDefault(options, JesRuntimeAttributes.coercionMap) } @@ -75,32 +71,25 @@ class JesInitializationActor(override val workflowDescriptor: BackendWorkflowDes */ override def beforeAll(): Future[Option[BackendInitializationData]] = { - val genomicsCredential = jesConfiguration.jesAttributes.genomicsCredential(workflowDescriptor.workflowOptions) - val gcsCredential = jesConfiguration.jesAttributes.gcsCredential(workflowDescriptor.workflowOptions) - - val jesCredentials = JesCredentials(genomicsCredential = genomicsCredential, gcsCredential = gcsCredential) def buildGenomics: Future[Genomics] = Future { - GenomicsFactory(jesConfiguration.googleConfig.applicationName, genomicsCredential, jesConfiguration.jesAttributes.endpointUrl) + jesConfiguration.genomicsFactory.withOptions(workflowDescriptor.workflowOptions) } for { - // generate single filesystem and genomics instances genomics <- buildGenomics - workflowPaths = new JesWorkflowPaths(workflowDescriptor, jesConfiguration, jesCredentials)(iOExecutionContext) + workflowPaths = new JesWorkflowPaths(workflowDescriptor, jesConfiguration)(context.system) _ <- if (jesConfiguration.needAuthFileUpload) writeAuthenticationFile(workflowPaths) else Future.successful(()) - _ = publishWorkflowRoot(workflowPaths.workflowRootPath.toString) + _ = publishWorkflowRoot(workflowPaths.workflowRoot.toString) } yield Option(JesBackendInitializationData(workflowPaths, genomics)) } private def writeAuthenticationFile(workflowPath: JesWorkflowPaths): Future[Unit] = { generateAuthJson(jesConfiguration.dockerCredentials, refreshTokenAuth) map { content => val path = workflowPath.gcsAuthFilePath - val upload = () => Future(path.writeAsJson(content)) - - workflowLogger.info(s"Creating authentication file for workflow ${workflowDescriptor.id} at \n ${path.toString}") - Retry.withRetry(upload, isFatal = isFatalJesException, isTransient = isTransientJesException)(context.system).void.recoverWith { + workflowLogger.info(s"Creating authentication file for workflow ${workflowDescriptor.id} at \n ${path.toUri}") + Future(path.writeAsJson(content)).void.recoverWith { case failure => Future.failed(new IOException("Failed to upload authentication file", failure)) - } + } void } getOrElse Future.successful(()) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala index 4ded3e9d14b..fe39df8402a 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala @@ -4,12 +4,12 @@ import java.nio.file.Path import akka.actor.{Actor, ActorRef} import better.files._ +import cromwell.backend.BackendWorkflowDescriptor import cromwell.backend.callcaching.JobCachingActorHelper -import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.WorkflowOptionKeys import cromwell.backend.impl.jes.io.{JesAttachedDisk, JesWorkingDisk} import cromwell.core.logging.JobLogging -import scala.language.postfixOps +import scala.util.Try trait JesJobCachingActorHelper extends JobCachingActorHelper { this: Actor with JobLogging => @@ -25,23 +25,33 @@ trait JesJobCachingActorHelper extends JobCachingActorHelper { def initializationData: JesBackendInitializationData def serviceRegistryActor: ActorRef + + def workflowDescriptor: BackendWorkflowDescriptor - def getPath(str: String) = jesCallPaths.gcsFileSystem.getPath(str) + def getPath(str: String): Try[Path] = jesCallPaths.getPath(str) override lazy val configurationDescriptor = jesConfiguration.configurationDescriptor - lazy val jesCallPaths = initializationData.workflowPaths.toJesCallPaths(jobDescriptor.key) + lazy val jesCallPaths = { + val workflowPaths = if (workflowDescriptor.breadCrumbs.isEmpty) { + initializationData.workflowPaths + } else { + new JesWorkflowPaths(workflowDescriptor, jesConfiguration)(context.system) + } + + workflowPaths.toJobPaths(jobDescriptor.key) + } lazy val runtimeAttributes = JesRuntimeAttributes(jobDescriptor.runtimeAttributes, jobLogger) lazy val retryable = jobDescriptor.key.attempt <= runtimeAttributes.preemptible lazy val workingDisk: JesAttachedDisk = runtimeAttributes.disks.find(_.name == JesWorkingDisk.Name).get - lazy val callRootPath: Path = jesCallPaths.callRootPath + lazy val callRootPath: Path = jesCallPaths.callExecutionRoot lazy val returnCodeFilename = jesCallPaths.returnCodeFilename - lazy val returnCodeGcsPath = jesCallPaths.returnCodePath - lazy val jesStdoutFile = jesCallPaths.stdoutPath - lazy val jesStderrFile = jesCallPaths.stderrPath + lazy val returnCodeGcsPath = jesCallPaths.returnCode + lazy val jesStdoutFile = jesCallPaths.stdout + lazy val jesStderrFile = jesCallPaths.stderr lazy val jesLogFilename = jesCallPaths.jesLogFilename lazy val defaultMonitoringOutputPath = callRootPath.resolve(JesMonitoringLogFile) @@ -49,28 +59,22 @@ trait JesJobCachingActorHelper extends JobCachingActorHelper { lazy val preemptible: Boolean = jobDescriptor.key.attempt <= maxPreemption lazy val jesAttributes = jesConfiguration.jesAttributes - // TODO: Move monitoring paths to JesCallPaths lazy val monitoringScript: Option[JesInput] = { - jobDescriptor.workflowDescriptor.workflowOptions.get(WorkflowOptionKeys.MonitoringScript) map { path => - JesFileInput(s"$MonitoringParamName-in", getPath(path).toString, + jesCallPaths.monitoringPath map { path => + JesFileInput(s"$MonitoringParamName-in", path.toUri.toString, JesWorkingDisk.MountPoint.resolve(JesMonitoringScript), workingDisk) - } toOption + } } lazy val monitoringOutput = monitoringScript map { _ => JesFileOutput(s"$MonitoringParamName-out", defaultMonitoringOutputPath.toString, File(JesMonitoringLogFile).path, workingDisk) } + // Implements CacheHitDuplicating.metadataKeyValues lazy val metadataKeyValues: Map[String, Any] = { val runtimeAttributesMetadata: Map[String, Any] = runtimeAttributes.asMap map { case (key, value) => s"runtimeAttributes:$key" -> value } - - var fileMetadata: Map[String, Any] = jesCallPaths.metadataPaths - if (monitoringOutput.nonEmpty) { - // TODO: Move this to JesCallPaths - fileMetadata += JesMetadataKeys.MonitoringLog -> monitoringOutput.get.gcs - } - + val otherMetadata: Map[String, Any] = Map( JesMetadataKeys.GoogleProject -> jesAttributes.project, JesMetadataKeys.ExecutionBucket -> jesAttributes.executionBucket, @@ -79,6 +83,6 @@ trait JesJobCachingActorHelper extends JobCachingActorHelper { "cache:allowResultReuse" -> true ) - runtimeAttributesMetadata ++ fileMetadata ++ otherMetadata + runtimeAttributesMetadata ++ jesCallPaths.metadataPaths ++ otherMetadata } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala index b1a8ba8d6a6..476ee04aa2b 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala @@ -1,6 +1,7 @@ package cromwell.backend.impl.jes -import akka.actor.{ActorRef, Props} +import akka.actor.SupervisorStrategy.{Decider, Stop} +import akka.actor.{ActorRef, OneForOneStrategy, Props} import akka.event.LoggingReceive import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand @@ -62,13 +63,15 @@ case class JesJobExecutionActor(override val jobDescriptor: BackendJobDescriptor private var executor: Option[ActorRef] = None + private[jes] def jabjeaProps = JesAsyncBackendJobExecutionActor.props(jobDescriptor, + completionPromise, + jesConfiguration, + initializationData, + serviceRegistryActor, + jesBackendSingletonActor) + private def launchExecutor: Future[Unit] = Future { - val executionProps = JesAsyncBackendJobExecutionActor.props(jobDescriptor, - completionPromise, - jesConfiguration, - initializationData, - serviceRegistryActor, - jesBackendSingletonActor) + val executionProps = jabjeaProps val executorRef = context.actorOf(executionProps, "JesAsyncBackendJobExecutionActor") executor = Option(executorRef) () @@ -95,4 +98,12 @@ case class JesJobExecutionActor(override val jobDescriptor: BackendJobDescriptor } override def abort(): Unit = {} + + // Supervision strategy: if the JABJEA throws an exception, stop the actor and fail the job. + def jobFailingDecider: Decider = { + case e: Exception => + completionPromise.tryFailure(new RuntimeException("JesAsyncBackendJobExecutionActor failed and didn't catch its exception.", e)) + Stop + } + override val supervisorStrategy = OneForOneStrategy()(jobFailingDecider) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala new file mode 100644 index 00000000000..fdbdb1dc7fe --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala @@ -0,0 +1,60 @@ +package cromwell.backend.impl.jes + +import java.nio.file.Path + +import akka.actor.ActorSystem +import cromwell.backend.io.JobPaths +import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} +import cromwell.core.CallContext +import cromwell.services.metadata.CallMetadataKeys + +object JesJobPaths { + def apply(jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, + jesConfiguration: JesConfiguration)(implicit actorSystem: ActorSystem): JesJobPaths = { + new JesJobPaths(jobKey, workflowDescriptor, jesConfiguration) + } + + val JesLogPathKey = "jesLog" + val GcsExecPathKey = "gcsExec" +} + +class JesJobPaths(val jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, + jesConfiguration: JesConfiguration)(implicit actorSystem: ActorSystem) extends + JesWorkflowPaths(workflowDescriptor, jesConfiguration)(actorSystem) with JobPaths { + + val jesLogBasename = { + val index = jobKey.index.map(s => s"-$s").getOrElse("") + s"${jobKey.scope.unqualifiedName}$index" + } + + override val returnCodeFilename: String = s"$jesLogBasename-rc.txt" + override val stdoutFilename: String = s"$jesLogBasename-stdout.log" + override val stderrFilename: String = s"$jesLogBasename-stderr.log" + override val scriptFilename: String = "exec.sh" + + val jesLogFilename: String = s"$jesLogBasename.log" + lazy val jesLogPath: Path = callExecutionRoot.resolve(jesLogFilename) + + lazy val callContext = CallContext(callExecutionRoot, stdoutFilename, stderrFilename) + + /* + TODO: Move various monitoring files path generation here. + + "/cromwell_root" is a well known path, called in the regular JobPaths callDockerRoot. + This JesCallPaths should know about that root, and be able to create the monitoring file paths. + Instead of the AsyncActor creating the paths, the paths could then be shared with the CachingActor. + + Those monitoring paths could then be returned by metadataFiles and detritusFiles. + */ + + override lazy val customMetadataPaths = Map( + CallMetadataKeys.BackendLogsPrefix + ":log" -> jesLogPath + ) ++ ( + monitoringPath map { p => Map(JesMetadataKeys.MonitoringLog -> p) } getOrElse Map.empty + ) + + override lazy val customDetritusPaths: Map[String, Path] = Map( + JesJobPaths.GcsExecPathKey -> script, + JesJobPaths.JesLogPathKey -> jesLogPath + ) +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala index 9b39c869abd..a7ac5e50ab1 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala @@ -2,51 +2,61 @@ package cromwell.backend.impl.jes import java.nio.file.Path -import cromwell.backend.impl.jes.authentication.JesCredentials +import akka.actor.ActorSystem +import com.typesafe.config.Config +import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.WorkflowOptionKeys +import cromwell.backend.io.WorkflowPaths import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} -import cromwell.core.WorkflowOptions.FinalCallLogsDir -import cromwell.filesystems.gcs.{GcsFileSystem, GcsFileSystemProvider, GoogleAuthMode} +import cromwell.core.WorkflowOptions +import cromwell.core.path.PathBuilder +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, RetryableGcsPathBuilder} -import scala.concurrent.ExecutionContext +import scala.language.postfixOps object JesWorkflowPaths { private val GcsRootOptionKey = "jes_gcs_root" private val AuthFilePathOptionKey = "auth_bucket" def apply(workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext) = { - new JesWorkflowPaths(workflowDescriptor, jesConfiguration, credentials) + jesConfiguration: JesConfiguration)(implicit actorSystem: ActorSystem) = { + new JesWorkflowPaths(workflowDescriptor, jesConfiguration) } } -class JesWorkflowPaths(workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext) { +class JesWorkflowPaths(val workflowDescriptor: BackendWorkflowDescriptor, + jesConfiguration: JesConfiguration)(implicit actorSystem: ActorSystem) extends WorkflowPaths { - private val gcsStorage = GoogleAuthMode.buildStorage(credentials.gcsCredential, jesConfiguration.googleConfig.applicationName) - val gcsFileSystemProvider: GcsFileSystemProvider = GcsFileSystemProvider(gcsStorage)(ec) - val gcsFileSystem = GcsFileSystem(gcsFileSystemProvider) + override lazy val executionRootString = workflowDescriptor.workflowOptions.getOrElse(JesWorkflowPaths.GcsRootOptionKey, jesConfiguration.root) + private val workflowOptions: WorkflowOptions = workflowDescriptor.workflowOptions + val gcsPathBuilder: RetryableGcsPathBuilder = jesConfiguration.gcsPathBuilderFactory.withOptions(workflowOptions) - val rootPath: Path = - gcsFileSystem.getPath(workflowDescriptor.workflowOptions.getOrElse(JesWorkflowPaths.GcsRootOptionKey, jesConfiguration.root)) - - val workflowRootPath: Path = rootPath.resolve(workflowDescriptor.workflowNamespace.workflow.unqualifiedName) - .resolve(workflowDescriptor.id.toString) - - val finalCallLogsPath = workflowDescriptor.getWorkflowOption(FinalCallLogsDir) map { gcsFileSystem.getPath(_) } + def getHash(gcsUrl: Path) = gcsPathBuilder.getHash(gcsUrl) val gcsAuthFilePath: Path = { /* * This is an "exception". The filesystem used here is built from genomicsAuth * unlike everywhere else where the filesystem used is built from gcsFileSystemAuth */ - val genomicsStorage = GoogleAuthMode.buildStorage(credentials.genomicsCredential, jesConfiguration.googleConfig.applicationName) - val fileSystemWithGenomicsAuth = GcsFileSystem(GcsFileSystemProvider(genomicsStorage)(ec)) - val bucket = workflowDescriptor.workflowOptions.get(JesWorkflowPaths.AuthFilePathOptionKey) getOrElse workflowRootPath.toString - - fileSystemWithGenomicsAuth.getPath(bucket).resolve(s"${workflowDescriptor.id}_auth.json") + val genomicsCredentials = jesConfiguration.jesAuths.genomics + + // The default auth file bucket is always at the root of the root workflow + val defaultBucket = executionRoot.resolve(workflowDescriptor.rootWorkflow.unqualifiedName).resolve(workflowDescriptor.rootWorkflowId.toString) + + val bucket = workflowDescriptor.workflowOptions.get(JesWorkflowPaths.AuthFilePathOptionKey) getOrElse defaultBucket.toUri.toString + val authBucket = GcsPathBuilderFactory(genomicsCredentials).withOptions(workflowOptions).build(bucket) recover { + case ex => throw new Exception(s"Invalid gcs auth_bucket path $bucket", ex) + } get + + authBucket.resolve(s"${workflowDescriptor.rootWorkflowId}_auth.json") + } + + + val monitoringPath = workflowOptions.get(WorkflowOptionKeys.MonitoringScript).toOption map { path => + // Fail here if the path exists but can't be built + getPath(path).get } - def toJesCallPaths(jobKey: BackendJobDescriptorKey) = JesCallPaths(jobKey, workflowDescriptor, jesConfiguration, credentials)(ec) + override def toJobPaths(jobKey: BackendJobDescriptorKey) = JesJobPaths(jobKey, workflowDescriptor, jesConfiguration) + override def config: Config = jesConfiguration.configurationDescriptor.backendConfig + override def pathBuilders: List[PathBuilder] = List(gcsPathBuilder) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala index b5a8b5f9f7b..05078c7f2cc 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala @@ -21,7 +21,6 @@ object Run { "https://www.googleapis.com/auth/compute" ).asJava - private val JesServiceAccount = new ServiceAccount().setEmail("default").setScopes(GenomicsScopes) private val AcceptableEvents = Set("start", "pulling-image", "localizing-files", "running-docker", "delocalizing-files", "ok", "fail", "start-shutdown", "preempted") val NoAddressFieldName = "noAddress" @@ -36,6 +35,7 @@ object Run { logFileName: String, jesParameters: Seq[JesParameter], projectId: String, + computeServiceAccount: String, preemptible: Boolean, genomicsInterface: Genomics): Run = { val logger = new JobLogger("JesRun", jobDescriptor.workflowDescriptor.id, jobDescriptor.key.tag, None, Set(slf4jLogger)) @@ -48,7 +48,7 @@ object Run { .setProjectId(projectId) .setDocker(pipelineInfo.docker) .setResources(pipelineInfo.resources) - .setName(workflow.workflowNamespace.workflow.unqualifiedName) + .setName(workflow.workflow.unqualifiedName) .setInputParameters(jesParameters.collect({ case i: JesInput => i.toGooglePipelineParameter }).toVector.asJava) .setOutputParameters(jesParameters.collect({ case i: JesFileOutput => i.toGooglePipelineParameter }).toVector.asJava) @@ -60,7 +60,8 @@ object Run { } def runPipeline: String = { - val rpargs = new RunPipelineArgs().setProjectId(projectId).setServiceAccount(JesServiceAccount).setResources(runtimePipelineResources) + val svcAccount = new ServiceAccount().setEmail(computeServiceAccount).setScopes(GenomicsScopes) + val rpargs = new RunPipelineArgs().setProjectId(projectId).setServiceAccount(svcAccount).setResources(runtimePipelineResources) rpargs.setInputs(jesParameters.collect({ case i: JesInput => i.name -> i.toGoogleRunParameter }).toMap.asJava) logger.debug(s"Inputs:\n${stringifyMap(rpargs.getInputs.asScala.toMap)}") diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala new file mode 100644 index 00000000000..bb6b048c4f9 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala @@ -0,0 +1,5 @@ +package cromwell.backend.impl.jes.authentication + +import cromwell.filesystems.gcs.auth.GoogleAuthMode + +case class JesAuths(genomics: GoogleAuthMode, gcs: GoogleAuthMode) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala deleted file mode 100644 index b4316fde355..00000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala +++ /dev/null @@ -1,5 +0,0 @@ -package cromwell.backend.impl.jes.authentication - -import com.google.api.client.auth.oauth2.Credential - -case class JesCredentials(genomicsCredential: Credential, gcsCredential: Credential) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala index 67ddd1df3cb..9c92b380ae8 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala @@ -1,7 +1,7 @@ package cromwell.backend.impl.jes.authentication import cromwell.core.DockerCredentials -import cromwell.filesystems.gcs.ClientSecrets +import cromwell.filesystems.gcs.auth.ClientSecrets import spray.json.{JsString, JsValue} /** diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala index 5c7cf4a8a01..42cf8f7bee2 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala @@ -9,8 +9,8 @@ import scala.util.{Failure, Try} private[jes] object JesBackendFileHashing { def getCrc32c(singleFileHashRequest: SingleFileHashRequest, log: LoggingAdapter): Try[String] = { def usingJesInitData(jesInitData: JesBackendInitializationData) = for { - path <- Try(jesInitData.workflowPaths.gcsFileSystem.getPath(singleFileHashRequest.file.valueString)) - crc32c <- Try(jesInitData.workflowPaths.gcsFileSystemProvider.crc32cHash(path)) + path <- jesInitData.workflowPaths.getPath(singleFileHashRequest.file.valueString) + crc32c <- jesInitData.workflowPaths.getHash(path) } yield crc32c singleFileHashRequest.initializationData match { diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala index 24d417c99e3..2a2ae8ac25c 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala @@ -3,19 +3,14 @@ package cromwell.backend.impl.jes import java.nio.file.{Files, Path} import com.google.api.client.http.HttpResponseException -import cromwell.filesystems.gcs._ +import com.google.cloud.storage.contrib.nio.CloudStorageOptions package object io { implicit class PathEnhanced(val path: Path) extends AnyVal { import better.files._ - def hash = path match { - case gcs: NioGcsPath => gcs.getFileSystem.provider().asInstanceOf[GcsFileSystemProvider].crc32cHash(gcs) - case _ => File(path).md5 - } - def writeAsJson(content: String): File = { - Files.write(path, content.getBytes, ContentTypeOption.Json) + Files.write(path, content.getBytes, CloudStorageOptions.withMimeType("application/json")) } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala index f3a7739ac67..12fe055dc84 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala @@ -11,15 +11,19 @@ import scala.collection.immutable.Queue * Currently, just holds a set of JES status poll requests until a PollingActor pulls the work. * TODO: Could eventually move all of the JES queries into a single work-pulling model. */ -class JesApiQueryManager extends Actor with ActorLogging { +class JesApiQueryManager(val qps: Int) extends Actor with ActorLogging { - private var workQueue: Queue[JesStatusPollQuery] = Queue.empty + // workQueue is protected for the unit tests, not intended to be generally overridden + protected[statuspolling] var workQueue: Queue[JesStatusPollQuery] = Queue.empty private var workInProgress: Map[ActorRef, JesPollingWorkBatch] = Map.empty // If the statusPoller dies, we want to stop it and handle the termination ourselves. override val supervisorStrategy = SupervisorStrategy.stoppingStrategy - private def statusPollerProps = JesPollingActor.props(self) - private var statusPoller: ActorRef = _ + private def statusPollerProps = JesPollingActor.props(self, qps) + + // statusPoller is protected for the unit tests, not intended to be generally overridden + protected[statuspolling] var statusPoller: ActorRef = _ + resetStatusPoller() override def receive = { @@ -70,15 +74,17 @@ class JesApiQueryManager extends Actor with ActorLogging { // Currently we can assume this is a polling actor. Might change in a future update: workInProgress.get(terminee) match { case Some(work) => - // Ouch. We should tell all of its clients that it fell over. And then start a new one. - log.error(s"The JES polling actor $terminee unexpectedly terminated while conducting ${work.workBatch.tail.size + 1} polls. Making a new one...") - work.workBatch.toList foreach { _.requester ! JesPollingActor.JesPollError } + // Most likely due to an unexpected HTTP error, push the work back on the queue and keep going + log.info(s"The JES polling actor $terminee unexpectedly terminated while conducting ${work.workBatch.tail.size + 1} polls. Making a new one...") + workInProgress -= terminee + workQueue = workQueue ++ work.workBatch.toList case None => // It managed to die while doing absolutely nothing...!? // Maybe it deserves an entry in https://en.wikipedia.org/wiki/List_of_unusual_deaths // Oh well, in the mean time don't do anything, just start a new one log.error(s"The JES polling actor $terminee managed to unexpectedly terminate whilst doing absolutely nothing. This is probably a programming error. Making a new one...") } + resetStatusPoller() } @@ -93,7 +99,7 @@ class JesApiQueryManager extends Actor with ActorLogging { object JesApiQueryManager { - def props: Props = Props(new JesApiQueryManager) + def props(qps: Int): Props = Props(new JesApiQueryManager(qps)) /** * Poll the job represented by the Run. diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala index 4152d393333..31a9d114a0c 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala @@ -7,7 +7,7 @@ import com.google.api.client.googleapis.batch.json.JsonBatchCallback import com.google.api.client.googleapis.json.GoogleJsonError import com.google.api.client.http.HttpHeaders import com.google.api.services.genomics.model.Operation -import cromwell.backend.impl.jes.Run +import cromwell.backend.impl.jes.{JesAttributes, Run} import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.{JesPollingWorkBatch, JesStatusPollQuery, NoWorkToDo} import cromwell.backend.impl.jes.statuspolling.JesPollingActor._ @@ -19,12 +19,11 @@ import scala.concurrent.duration._ /** * Polls JES for status. Pipes the results back (so expect either a RunStatus or a akka.actor.Status.Failure). */ -class JesPollingActor(pollingManager: ActorRef) extends Actor with ActorLogging { +class JesPollingActor(val pollingManager: ActorRef, val qps: Int) extends Actor with ActorLogging { + // The interval to delay between submitting each batch + lazy val batchInterval = determineBatchInterval(determineEffectiveQps(qps)) + log.debug("JES batch polling interval is {}", batchInterval) - // We want to query at just under our fixed JES QPS limit of 20 per second. That should hopefully allow some room at the edges - // for things like new calls, etc. - val MaxBatchSize = 100 - val BatchInterval = 5.5.seconds self ! NoWorkToDo // Starts the check-for-work cycle implicit val ec: ExecutionContext = context.dispatcher @@ -109,14 +108,43 @@ class JesPollingActor(pollingManager: ActorRef) extends Actor with ActorLogging * Warning: Only use this from inside a receive method. */ private def scheduleCheckForWork(): Unit = { - context.system.scheduler.scheduleOnce(BatchInterval) { pollingManager ! JesApiQueryManager.RequestJesPollingWork(MaxBatchSize) } + context.system.scheduler.scheduleOnce(batchInterval) { pollingManager ! JesApiQueryManager.RequestJesPollingWork(MaxBatchSize) } () } + + /** + * We don't want to allow non-positive QPS values. Catch these instances and replace them with a sensible default. + * Here we're using the default value coming from JES itself + */ + private def determineEffectiveQps(qps: Int): Int = { + if (qps > 0) qps + else { + val defaultQps = JesAttributes.GenomicsApiDefaultQps + log.warning("Supplied QPS for Google Genomics API was not positive, value was {} using {} instead", qps, defaultQps) + defaultQps + } + } } object JesPollingActor { - def props(pollingManager: ActorRef) = Props(new JesPollingActor(pollingManager)) + def props(pollingManager: ActorRef, qps: Int) = Props(new JesPollingActor(pollingManager, qps)) + + // The Batch API limits us to 100 at a time + val MaxBatchSize = 100 + + /** + * Given the Genomics API queries per 100 seconds and given MaxBatchSize will determine a batch interval which + * is at 90% of the quota. The (still crude) delta is to provide some room at the edges for things like new + * calls, etc. + * + * Forcing the minimum value to be 1 second, for now it seems unlikely to matter and it makes testing a bit + * easier + */ + def determineBatchInterval(qps: Int): FiniteDuration = { + val maxInterval = MaxBatchSize / qps.toDouble // Force this to be floating point in case the value is < 1 + val interval = Math.max(maxInterval * 0.9, 1) + interval.seconds + } final case class JesPollFailed(e: GoogleJsonError, responseHeaders: HttpHeaders) - case object JesPollError } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorClient.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorClient.scala index 1bf7328f83a..9070378c338 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorClient.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorClient.scala @@ -3,7 +3,7 @@ package cromwell.backend.impl.jes.statuspolling import java.io.IOException import akka.actor.{Actor, ActorLogging, ActorRef} -import cromwell.backend.impl.jes.statuspolling.JesPollingActor.{JesPollError, JesPollFailed} +import cromwell.backend.impl.jes.statuspolling.JesPollingActor.JesPollFailed import cromwell.backend.impl.jes.{Run, RunStatus} import scala.concurrent.{Future, Promise} @@ -28,9 +28,6 @@ trait JesPollingActorClient { this: Actor with ActorLogging => case JesPollFailed(e, responseHeaders) => log.debug("JES poll failed! Sad.") completePromise(Failure(new IOException(s"Google request failed: ${e.toPrettyString}"))) - case JesPollError => - log.debug("JES poll failed when polling actor died unexpectedly! Sad.") - completePromise(Failure(new RuntimeException("Unexpected actor death!"))) } private def completePromise(runStatus: Try[RunStatus]) = { diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala index 5601e1ebbbc..40078cac614 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala @@ -4,19 +4,20 @@ import java.nio.file.Paths import java.util.UUID import akka.actor.{ActorRef, Props} -import akka.event.LoggingAdapter import akka.testkit.{ImplicitSender, TestActorRef, TestDuration, TestProbe} +import com.google.cloud.storage.contrib.nio.CloudStoragePath import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse +import cromwell.backend._ import cromwell.backend.async.AsyncBackendJobExecutionActor.{Execute, ExecutionMode} import cromwell.backend.async.{AbortedExecutionHandle, ExecutionHandle, FailedNonRetryableExecutionHandle, FailedRetryableExecutionHandle} import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.JesPendingExecutionHandle -import cromwell.backend.impl.jes.MockObjects._ import cromwell.backend.impl.jes.RunStatus.Failed import cromwell.backend.impl.jes.io.{DiskType, JesWorkingDisk} -import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobDescriptorKey, BackendWorkflowDescriptor, PreemptedException, RuntimeAttributeDefinition} -import cromwell.core.logging.LoggerWrapper +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.DoPoll +import cromwell.core.logging.JobLogger import cromwell.core.{WorkflowId, WorkflowOptions, _} -import cromwell.filesystems.gcs._ +import cromwell.filesystems.gcs.GcsPathBuilderFactory +import cromwell.filesystems.gcs.auth.GoogleAuthMode.NoAuthMode import cromwell.util.SampleWdl import org.scalatest._ import org.scalatest.prop.Tables.Table @@ -25,16 +26,16 @@ import org.specs2.mock.Mockito import spray.json.{JsObject, JsValue} import wdl4s.types.{WdlArrayType, WdlFileType, WdlMapType, WdlStringType} import wdl4s.values.{WdlArray, WdlFile, WdlMap, WdlString, WdlValue} -import wdl4s.{Call, LocallyQualifiedName, NamespaceWithWorkflow} +import wdl4s.{LocallyQualifiedName, FullyQualifiedName => _, _} import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext, Future, Promise} import scala.util.{Success, Try} -import cromwell.backend.impl.jes.MockObjects._ -import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.DoPoll class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackendJobExecutionActorSpec") - with FlatSpecLike with Matchers with ImplicitSender with Mockito { + with FlatSpecLike with Matchers with ImplicitSender with Mockito with BackendSpec { + + val mockPathBuilder = GcsPathBuilderFactory(NoAuthMode).withOptions(mock[WorkflowOptions]) import JesTestConfig._ @@ -56,7 +57,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend | } |} | - |workflow sup { + |workflow wf_sup { | call sup |} """.stripMargin @@ -65,16 +66,14 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val NoOptions = WorkflowOptions(JsObject(Map.empty[String, JsValue])) - val TestableCallContext = CallContext(MockGcsFileSystemBuilder.mockGcsFileSystem.getPath("gs://root"), "out", "err") + val TestableCallContext = CallContext(mockPathBuilder.build("gs://root").get, "out", "err") val TestableJesExpressionFunctions = { - new JesExpressionFunctions(List(MockGcsFileSystemBuilder.mockGcsFileSystem), TestableCallContext) + new JesExpressionFunctions(List(mockPathBuilder), TestableCallContext) } private def buildInitializationData(jobDescriptor: BackendJobDescriptor, configuration: JesConfiguration) = { - val workflowPaths = JesWorkflowPaths(jobDescriptor.workflowDescriptor, - configuration, - mockCredentials)(scala.concurrent.ExecutionContext.global) + val workflowPaths = JesWorkflowPaths(jobDescriptor.workflowDescriptor, configuration)(system) JesBackendInitializationData(workflowPaths, null) } @@ -85,12 +84,9 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend jesSingletonActor: ActorRef = emptyActor) extends JesAsyncBackendJobExecutionActor(jobDescriptor, promise, jesConfiguration, buildInitializationData(jobDescriptor, jesConfiguration), emptyActor, jesSingletonActor) { - override lazy val jobLogger = new LoggerWrapper { - override def akkaLogger: Option[LoggingAdapter] = Option(log) - + override lazy val jobLogger = new JobLogger("TestLogger", workflowId, jobTag, akkaLogger = Option(log)) { override def tag: String = s"$name [UUID(${workflowId.shortString})$jobTag]" - - override def slf4jLoggers: Set[Logger] = Set.empty + override val slf4jLoggers: Set[Logger] = Set.empty } override lazy val callEngineFunctions = functions @@ -110,15 +106,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend private def buildPreemptibleJobDescriptor(attempt: Int, preemptible: Int): BackendJobDescriptor = { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", s"preemptible: $preemptible")), + WdlNamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", s"preemptible: $preemptible"), Seq.empty[ImportResolver]).workflow, Inputs, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(job, None, attempt) val runtimeAttributes = makeRuntimeAttributes(job) - BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Inputs) + BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(Inputs)) } private def executionActor(jobDescriptor: BackendJobDescriptor, @@ -128,7 +124,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend // Mock/stub out the bits that would reach out to JES. val run = mock[Run] - val handle = JesPendingExecutionHandle(jobDescriptor, Seq.empty, run, None) + val handle = JesPendingExecutionHandle(jobDescriptor, Set.empty, run, None) class ExecuteOrRecoverActor extends TestableJesJobExecutionActor(jobDescriptor, promise, jesConfiguration, jesSingletonActor = jesSingletonActor) { override def executeOrRecover(mode: ExecutionMode)(implicit ec: ExecutionContext): Future[ExecutionHandle] = Future.successful(handle) @@ -187,8 +183,8 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend expectations foreach { case (attempt, preemptible, errorCode, innerErrorCode, shouldRetry) => it should s"handle call failures appropriately with respect to preemption (attempt=$attempt, preemptible=$preemptible, errorCode=$errorCode, innerErrorCode=$innerErrorCode)" in { runAndFail(attempt, preemptible, errorCode, innerErrorCode).getClass.getSimpleName match { - case "FailedNonRetryableResponse" => false shouldBe shouldRetry - case "FailedRetryableResponse" => true shouldBe shouldRetry + case "JobFailedNonRetryableResponse" => false shouldBe shouldRetry + case "JobFailedRetryableResponse" => true shouldBe shouldRetry case huh => fail(s"Unexpected response class name: '$huh'") } } @@ -284,21 +280,21 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", "")), + WdlNamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", ""), Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") - val mappedInputs = jobDescriptor.inputs mapValues testActorRef.underlyingActor.gcsPathToLocal + val mappedInputs = jobDescriptor.fullyQualifiedInputs mapValues testActorRef.underlyingActor.gcsPathToLocal mappedInputs(stringKey) match { case WdlString(v) => assert(v.equalsIgnoreCase(stringVal.value)) @@ -338,15 +334,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -377,15 +373,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend TestActorRef[TestableJesJobExecutionActor] = { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(sampleWdl.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(sampleWdl.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val call = workflowDescriptor.workflowNamespace.workflow.findCallByName(callName).get + val call = workflowDescriptor.workflow.findCallByName(callName).get.asInstanceOf[TaskCall] val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration, functions)) TestActorRef[TestableJesJobExecutionActor](props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") @@ -393,14 +389,14 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "generate correct JesOutputs" in { val inputs = Map( - "in" -> WdlFile("gs://a/b/c.txt") + "in" -> WdlFile("gs://blah/b/c.txt") ) val jesBackend = makeJesActorRef(SampleWdl.FilePassingWorkflow, "a", inputs).underlyingActor val jobDescriptor = jesBackend.jobDescriptor val workflowId = jesBackend.workflowId val jesInputs = jesBackend.generateJesInputs(jobDescriptor) jesInputs should have size 1 - jesInputs should contain(JesFileInput("in-0", "gs://a/b/c.txt", Paths.get("a/b/c.txt"), workingDisk)) + jesInputs should contain(JesFileInput("in-0", "gs://blah/b/c.txt", Paths.get("blah/b/c.txt"), workingDisk)) val jesOutputs = jesBackend.generateJesOutputs(jobDescriptor) jesOutputs should have size 1 jesOutputs should contain(JesFileOutput("out", @@ -413,7 +409,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend ) class TestJesExpressionFunctions extends JesExpressionFunctions( - List(MockGcsFileSystemBuilder.mockGcsFileSystem), TestableCallContext) { + List(mockPathBuilder), TestableCallContext) { override def write_lines(params: Seq[Try[WdlValue]]): Try[WdlFile] = { Success(WdlFile(s"gs://some/path/file.txt")) } @@ -438,15 +434,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -466,15 +462,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -487,7 +483,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend } it should "convert local Paths back to corresponding GCS paths in JesOutputs" in { - val jesOutputs = Seq( + val jesOutputs = Set( JesFileOutput("/cromwell_root/path/to/file1", "gs://path/to/file1", Paths.get("/cromwell_root/path/to/file1"), workingDisk), JesFileOutput("/cromwell_root/path/to/file2", "gs://path/to/file2", @@ -510,12 +506,12 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, NoOptions ) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -538,12 +534,12 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "create a JesFileInput for the monitoring script, when specified" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, WorkflowOptions.fromJsonString("""{"monitoring_script": "gs://path/to/script"}""").get ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -559,12 +555,12 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "not create a JesFileInput for the monitoring script, when not specified" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(job, None, 1) val runtimeAttributes = makeRuntimeAttributes(job) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -579,13 +575,13 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "return JES log paths for non-scattered call" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId(UUID.fromString("e6236763-c518-41d0-9688-432549a8bf7c")), - NamespaceWithWorkflow.load( - SampleWdl.HelloWorld.asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource), + WdlNamespaceWithWorkflow.load( + SampleWdl.HelloWorld.asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, WorkflowOptions.fromJsonString(""" {"jes_gcs_root": "gs://path/to/gcs_root"} """).get ) - val call = workflowDescriptor.workflowNamespace.workflow.findCallByName("hello").get + val call = workflowDescriptor.workflow.findCallByName("hello").get.asInstanceOf[TaskCall] val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -596,28 +592,27 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesBackend = testActorRef.underlyingActor - // TODO: NioGcsPath.equals not implemented, so use toString instead - jesBackend.jesCallPaths.stdoutPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stdoutPath.toString shouldBe - "gs://path/to/gcs_root/hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stdout.log" - jesBackend.jesCallPaths.stderrPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stderrPath.toString shouldBe - "gs://path/to/gcs_root/hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stderr.log" - jesBackend.jesCallPaths.jesLogPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.jesLogPath.toString shouldBe - "gs://path/to/gcs_root/hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello.log" + jesBackend.jesCallPaths.stdout should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.stdout.toUri.toString shouldBe + "gs://path/to/gcs_root/wf_hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stdout.log" + jesBackend.jesCallPaths.stderr should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.stderr.toUri.toString shouldBe + "gs://path/to/gcs_root/wf_hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stderr.log" + jesBackend.jesCallPaths.jesLogPath should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.jesLogPath.toUri.toString shouldBe + "gs://path/to/gcs_root/wf_hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello.log" } it should "return JES log paths for scattered call" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId(UUID.fromString("e6236763-c518-41d0-9688-432549a8bf7d")), - NamespaceWithWorkflow.load( - new SampleWdl.ScatterWdl().asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource), + WdlNamespaceWithWorkflow.load( + new SampleWdl.ScatterWdl().asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, WorkflowOptions.fromJsonString(""" {"jes_gcs_root": "gs://path/to/gcs_root"} """).get ) - val call = workflowDescriptor.workflowNamespace.workflow.findCallByName("B").get + val call = workflowDescriptor.workflow.findCallByName("B").get.asInstanceOf[TaskCall] val key = BackendJobDescriptorKey(call, Option(2), 1) val runtimeAttributes = makeRuntimeAttributes(call) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -628,14 +623,14 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesBackend = testActorRef.underlyingActor - jesBackend.jesCallPaths.stdoutPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stdoutPath.toString shouldBe + jesBackend.jesCallPaths.stdout should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.stdout.toUri.toString shouldBe "gs://path/to/gcs_root/w/e6236763-c518-41d0-9688-432549a8bf7d/call-B/shard-2/B-2-stdout.log" - jesBackend.jesCallPaths.stderrPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stderrPath.toString shouldBe + jesBackend.jesCallPaths.stderr should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.stderr.toUri.toString shouldBe "gs://path/to/gcs_root/w/e6236763-c518-41d0-9688-432549a8bf7d/call-B/shard-2/B-2-stderr.log" - jesBackend.jesCallPaths.jesLogPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.jesLogPath.toString shouldBe + jesBackend.jesCallPaths.jesLogPath should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.jesLogPath.toUri.toString shouldBe "gs://path/to/gcs_root/w/e6236763-c518-41d0-9688-432549a8bf7d/call-B/shard-2/B-2.log" } @@ -662,7 +657,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend descriptorWithMax2AndKey2.preemptible shouldBe true } - private def makeRuntimeAttributes(job: Call) = { + private def makeRuntimeAttributes(job: TaskCall) = { val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(job.task.runtimeAttributes, TestableJesExpressionFunctions, Map.empty) RuntimeAttributeDefinition.addDefaultsToAttributes(JesBackendLifecycleActorFactory.staticRuntimeAttributeDefinitions, NoOptions)(evaluatedAttributes.get) // Fine to throw the exception if this "get" fails. This is a test after all! } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala index 17c33d0ded8..0eec23cbe40 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala @@ -16,18 +16,19 @@ class JesAttributesSpec extends FlatSpec with Matchers { it should "parse correct JES config" taggedAs IntegrationTest in { val googleConfig = GoogleConfiguration(JesGlobalConfig) - val backendConfig = ConfigFactory.parseString(configString.replace("[PREEMPTIBLE]", "")) + val backendConfig = ConfigFactory.parseString(configString()) val jesAttributes = JesAttributes(googleConfig, backendConfig) jesAttributes.endpointUrl should be(new URL("http://myEndpoint")) jesAttributes.project should be("myProject") jesAttributes.executionBucket should be("gs://myBucket") jesAttributes.maxPollingInterval should be(600) + jesAttributes.computeServiceAccount should be("default") } it should "parse correct preemptible config" taggedAs IntegrationTest in { val googleConfig = GoogleConfiguration(JesGlobalConfig) - val backendConfig = ConfigFactory.parseString(configString.replace("[PREEMPTIBLE]", "preemptible = 3")) + val backendConfig = ConfigFactory.parseString(configString(preemptible = "preemptible = 3")) val jesAttributes = JesAttributes(googleConfig, backendConfig) jesAttributes.endpointUrl should be(new URL("http://myEndpoint")) @@ -36,6 +37,14 @@ class JesAttributesSpec extends FlatSpec with Matchers { jesAttributes.maxPollingInterval should be(600) } + it should "parse compute service account" taggedAs IntegrationTest in { + val googleConfig = GoogleConfiguration(JesGlobalConfig) + val backendConfig = ConfigFactory.parseString(configString(genomics = """compute-service-account = "testing" """)) + + val jesAttributes = JesAttributes(googleConfig, backendConfig) + jesAttributes.computeServiceAccount should be("testing") + } + it should "not parse invalid config" taggedAs IntegrationTest in { val nakedConfig = ConfigFactory.parseString( @@ -60,17 +69,18 @@ class JesAttributesSpec extends FlatSpec with Matchers { errorsList should contain("no protocol: myEndpoint") } - val configString = - """ + def configString(preemptible: String = "", genomics: String = "") = + s""" |{ | project = "myProject" | root = "gs://myBucket" | maximum-polling-interval = 600 - | [PREEMPTIBLE] + | $preemptible | genomics { | // A reference to an auth defined in the `google` stanza at the top. This auth is used to create | // Pipelines and manipulate auth JSONs. | auth = "application-default" + | $genomics | endpoint-url = "http://myEndpoint" | } | diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala index d2732870634..58841f75664 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala @@ -1,13 +1,12 @@ package cromwell.backend.impl.jes import cromwell.backend.BackendSpec +import cromwell.core.TestKitSuite import cromwell.util.SampleWdl -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{FlatSpecLike, Matchers} import org.specs2.mock.Mockito -import scala.concurrent.ExecutionContext.Implicits.global -import cromwell.backend.impl.jes.MockObjects._ -class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { +class JesCallPathsSpec extends TestKitSuite with FlatSpecLike with Matchers with Mockito { import BackendSpec._ import JesTestConfig._ @@ -19,8 +18,8 @@ class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { val jobDescriptorKey = firstJobDescriptorKey(workflowDescriptor) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - val callPaths = JesCallPaths(jobDescriptorKey, workflowDescriptor, - jesConfiguration, mockCredentials) + val callPaths = JesJobPaths(jobDescriptorKey, workflowDescriptor, + jesConfiguration) callPaths.returnCodeFilename should be("hello-rc.txt") callPaths.stderrFilename should be("hello-stderr.log") callPaths.stdoutFilename should be("hello-stdout.log") @@ -32,16 +31,15 @@ class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { val jobDescriptorKey = firstJobDescriptorKey(workflowDescriptor) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - val callPaths = JesCallPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration, - mockCredentials) - callPaths.returnCodePath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello-rc.txt") - callPaths.stdoutPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello-stdout.log") - callPaths.stderrPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello-stderr.log") - callPaths.jesLogPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello.log") + val callPaths = JesJobPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration) + callPaths.returnCode.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-rc.txt") + callPaths.stdout.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-stdout.log") + callPaths.stderr.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-stderr.log") + callPaths.jesLogPath.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello.log") } it should "map the correct call context" in { @@ -49,10 +47,9 @@ class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { val jobDescriptorKey = firstJobDescriptorKey(workflowDescriptor) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - val callPaths = JesCallPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration, - mockCredentials) - callPaths.callContext.root.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello") + val callPaths = JesJobPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration) + callPaths.callContext.root.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello") callPaths.callContext.stdout should be("hello-stdout.log") callPaths.callContext.stderr should be("hello-stderr.log") } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala index c2b77d38cfb..46068343c3f 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala @@ -1,16 +1,25 @@ package cromwell.backend.impl.jes +import better.files.File import com.typesafe.config.{ConfigValueFactory, ConfigFactory} import cromwell.backend.BackendConfigurationDescriptor import org.scalatest.prop.TableDrivenPropertyChecks -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} -class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks { +class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks with BeforeAndAfterAll { behavior of "JesConfigurationSpec" + val mockFile = File.newTemporaryFile() + + override def afterAll(): Unit = { + mockFile.delete(true) + + () + } + val globalConfig = ConfigFactory.parseString( - """ + s""" |google { | | application-name = "cromwell" @@ -24,13 +33,13 @@ class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenProper | name = "user-via-refresh" | scheme = "refresh_token" | client-id = "secret_id" - | client-secret = "secret_secret" + | client-secret = "${mockFile.pathAsString}" | }, | { | name = "service-account" | scheme = "service_account" | service-account-id = "my-service-account" - | pem-file = "/path/to/file.pem" + | pem-file = "${mockFile.pathAsString}" | } | ] |} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala index 4699402bcb8..73b8cb6de04 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala @@ -7,14 +7,16 @@ import com.typesafe.config.{Config, ConfigFactory} import cromwell.backend.BackendWorkflowInitializationActor.{InitializationFailed, InitializationSuccess, Initialize} import cromwell.backend.impl.jes.authentication.GcsLocalizing import cromwell.backend.{BackendConfigurationDescriptor, BackendSpec, BackendWorkflowDescriptor} +import cromwell.core.Tags.IntegrationTest import cromwell.core.logging.LoggingTest._ import cromwell.core.{TestKitSuite, WorkflowOptions} -import cromwell.filesystems.gcs.{RefreshTokenMode, SimpleClientSecrets} +import cromwell.filesystems.gcs.GoogleConfiguration +import cromwell.filesystems.gcs.auth.{RefreshTokenMode, SimpleClientSecrets} import cromwell.util.{EncryptionSpec, SampleWdl} import org.scalatest.{FlatSpecLike, Matchers} import org.specs2.mock.Mockito import spray.json._ -import wdl4s.Call +import wdl4s.TaskCall import scala.concurrent.duration._ @@ -38,7 +40,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -136,17 +138,17 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val refreshTokenConfig = ConfigFactory.parseString(refreshTokenConfigTemplate) - private def getJesBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], conf: BackendConfigurationDescriptor) = { + private def getJesBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], conf: BackendConfigurationDescriptor) = { system.actorOf(JesInitializationActor.props(workflowDescriptor, calls, new JesConfiguration(conf), emptyActor)) } behavior of "JesInitializationActor" - it should "log a warning message when there are unsupported runtime attributes" in { + it should "log a warning message when there are unsupported runtime attributes" taggedAs IntegrationTest in { within(Timeout) { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { docker: "ubuntu/latest" test: true }""") - val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, + val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, defaultBackendConfig) val eventPattern = "Key/s [test] is/are not supported by JesBackend. Unsupported attributes will not be part of jobs executions." @@ -163,7 +165,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe it should "return InitializationFailed when docker runtime attribute key is not present" in { within(Timeout) { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { }""") - val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, + val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, defaultBackendConfig) backend ! Initialize expectMsgPF() { @@ -182,7 +184,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe private def buildJesInitializationTestingBits(backendConfig: Config = dockerBackendConfig): TestingBits = { val workflowOptions = WorkflowOptions.fromMap(Map("refresh_token" -> "mytoken")).get val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.wdlSource(), options = workflowOptions) - val calls = workflowDescriptor.workflowNamespace.workflow.calls + val calls = workflowDescriptor.workflow.taskCalls val backendConfigurationDescriptor = BackendConfigurationDescriptor(backendConfig, globalConfig) val jesConfiguration = new JesConfiguration(backendConfigurationDescriptor) @@ -197,7 +199,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val TestingBits(actorRef, _) = buildJesInitializationTestingBits(refreshTokenConfig) val actor = actorRef.underlyingActor - actor.refreshTokenAuth should be(Some(GcsLocalizing(RefreshTokenMode("user-via-refresh", "secret_id", "secret_secret"), "mytoken"))) + actor.refreshTokenAuth should be(Some(GcsLocalizing(RefreshTokenMode("user-via-refresh", "secret_id", "secret_secret", GoogleConfiguration.GoogleScopes), "mytoken"))) } it should "generate the correct json content for no docker token and no refresh token" in { diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala new file mode 100644 index 00000000000..2c085371816 --- /dev/null +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala @@ -0,0 +1,111 @@ +package cromwell.backend.impl.jes + +import akka.actor.{Actor, ActorRef, Props} +import akka.testkit.{TestActorRef, TestProbe} +import cromwell.backend.BackendJobDescriptor +import cromwell.core.TestKitSuite +import org.scalatest.{FlatSpecLike, Matchers} +import org.specs2.mock.Mockito + +import scala.concurrent.duration._ +import akka.testkit._ +import cromwell.backend.BackendJobExecutionActor.{ExecuteJobCommand, JobFailedNonRetryableResponse} +import cromwell.backend.impl.jes.ControllableFailingJabjea.JabjeaExplode + +import scala.concurrent.{ExecutionContext, Promise} + +class JesJobExecutionActorSpec extends TestKitSuite("JesJobExecutionActorSpec") with FlatSpecLike with Matchers with Mockito { + + behavior of "JesJobExecutionActor" + + private val AwaitAlmostNothing = 100.milliseconds.dilated + private val TimeoutDuration = 10.seconds.dilated + implicit val ec: ExecutionContext = system.dispatcher + + it should "catch failures in JABJEA initialization and fail the job accordingly" in { + val jobDescriptor = mock[BackendJobDescriptor] + val jesWorkflowInfo = mock[JesConfiguration] + val initializationData = mock[JesBackendInitializationData] + val serviceRegistryActor = system.actorOf(Props.empty) + val jesBackendSingletonActor = Option(system.actorOf(Props.empty)) + + val parent = TestProbe() + val deathwatch = TestProbe() + val testJJEA = TestActorRef[TestJesJobExecutionActor]( + props = Props(new TestJesJobExecutionActor(jobDescriptor, jesWorkflowInfo, initializationData, serviceRegistryActor, jesBackendSingletonActor, Props(new ConstructorFailingJABJEA))), + supervisor = parent.ref) + deathwatch watch testJJEA + + // Nothing happens: + parent.expectNoMsg(max = AwaitAlmostNothing) + deathwatch.expectNoMsg(max = AwaitAlmostNothing) + + testJJEA.tell(msg = ExecuteJobCommand, sender = parent.ref) + + parent.expectMsgPF(max = TimeoutDuration) { + case JobFailedNonRetryableResponse(jobKey, e, errorCode) => + e.getMessage should be("JesAsyncBackendJobExecutionActor failed and didn't catch its exception.") + } + } + + it should "catch failures at a random point during JABJEA processing and fail the job accordingly" in { + val jobDescriptor = mock[BackendJobDescriptor] + val jesWorkflowInfo = mock[JesConfiguration] + val initializationData = mock[JesBackendInitializationData] + val serviceRegistryActor = system.actorOf(Props.empty) + val jesBackendSingletonActor = Option(system.actorOf(Props.empty)) + + val parent = TestProbe() + val deathwatch = TestProbe() + val jabjeaConstructionPromise = Promise[ActorRef]() + val testJJEA = TestActorRef[TestJesJobExecutionActor]( + props = Props(new TestJesJobExecutionActor(jobDescriptor, jesWorkflowInfo, initializationData, serviceRegistryActor, jesBackendSingletonActor, Props(new ControllableFailingJabjea(jabjeaConstructionPromise)))), + supervisor = parent.ref) + deathwatch watch testJJEA + + // Nothing happens: + parent.expectNoMsg(max = AwaitAlmostNothing) + deathwatch.expectNoMsg(max = AwaitAlmostNothing) + + testJJEA.tell(msg = ExecuteJobCommand, sender = parent.ref) + + // Wait for the JABJEA to be spawned. Then kill it: + parent.expectNoMsg(max = AwaitAlmostNothing) + deathwatch.expectNoMsg(max = AwaitAlmostNothing) + jabjeaConstructionPromise.future foreach { _ ! JabjeaExplode } + + parent.expectMsgPF(max = TimeoutDuration) { + case JobFailedNonRetryableResponse(jobKey, e, errorCode) => + e.getMessage should be("JesAsyncBackendJobExecutionActor failed and didn't catch its exception.") + } + } +} + +class TestJesJobExecutionActor(jobDescriptor: BackendJobDescriptor, + jesWorkflowInfo: JesConfiguration, + initializationData: JesBackendInitializationData, + serviceRegistryActor: ActorRef, + jesBackendSingletonActor: Option[ActorRef], + fakeJabjeaProps: Props) extends JesJobExecutionActor(jobDescriptor, jesWorkflowInfo, initializationData, serviceRegistryActor, jesBackendSingletonActor) { + override def jabjeaProps: Props = fakeJabjeaProps +} + +class ConstructorFailingJABJEA extends ControllableFailingJabjea(Promise[ActorRef]()) { + // Explode immediately in the constructor: + explode() +} + +class ControllableFailingJabjea(constructionPromise: Promise[ActorRef]) extends Actor { + def explode() = { + val boom = 1 == 1 + if (boom) throw new RuntimeException("Test Exception! Don't panic if this appears during a test run!") + } + constructionPromise.trySuccess(self) + override def receive = { + case JabjeaExplode => explode() + } +} + +object ControllableFailingJabjea { + case object JabjeaExplode +} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala index 03f17d65be2..25f06138753 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala @@ -48,5 +48,5 @@ object JesTestConfig { val JesBackendConfig = ConfigFactory.parseString(JesBackendConfigString) val JesGlobalConfig = ConfigFactory.parseString(JesGlobalConfigString) - val JesBackendConfigurationDescriptor = new BackendConfigurationDescriptor(JesBackendConfig, JesGlobalConfig) + val JesBackendConfigurationDescriptor = BackendConfigurationDescriptor(JesBackendConfig, JesGlobalConfig) } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala index 3f1dea365a5..48dd3d74c6e 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala @@ -1,12 +1,12 @@ package cromwell.backend.impl.jes import cromwell.backend.BackendSpec +import cromwell.core.TestKitSuite import cromwell.util.SampleWdl -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{FlatSpecLike, Matchers} import org.specs2.mock.Mockito -import cromwell.backend.impl.jes.MockObjects._ -class JesWorkflowPathsSpec extends FlatSpec with Matchers with Mockito { +class JesWorkflowPathsSpec extends TestKitSuite with FlatSpecLike with Matchers with Mockito { import BackendSpec._ import JesTestConfig._ @@ -16,11 +16,11 @@ class JesWorkflowPathsSpec extends FlatSpec with Matchers with Mockito { val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.wdlSource()) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - val workflowPaths = JesWorkflowPaths(workflowDescriptor, jesConfiguration, mockCredentials)(scala.concurrent.ExecutionContext.global) - workflowPaths.rootPath.toString should be("gs://my-cromwell-workflows-bucket") - workflowPaths.workflowRootPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}") - workflowPaths.gcsAuthFilePath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/${workflowDescriptor.id}_auth.json") + val workflowPaths = JesWorkflowPaths(workflowDescriptor, jesConfiguration)(system) + workflowPaths.executionRoot.toUri.toString should be("gs://my-cromwell-workflows-bucket/") + workflowPaths.workflowRoot.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/") + workflowPaths.gcsAuthFilePath.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/${workflowDescriptor.id}_auth.json") } } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala deleted file mode 100644 index 1cde38c4736..00000000000 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.backend.impl.jes - -import com.google.api.client.googleapis.testing.auth.oauth2.MockGoogleCredential -import cromwell.backend.impl.jes.authentication.JesCredentials - -object MockObjects { - val mockCredential = new MockGoogleCredential.Builder().build() - val mockCredentials = JesCredentials(mockCredential, mockCredential) -} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala index 39430abb22b..5398ed66c35 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala @@ -3,13 +3,11 @@ package cromwell.backend.impl.jes import java.time.OffsetDateTime import java.util -import com.google.api.client.googleapis.testing.auth.oauth2.MockGoogleCredential import com.google.api.client.util.ArrayMap -import com.google.api.services.genomics.Genomics import com.google.api.services.genomics.model.Operation +import cromwell.core.ExecutionEvent import org.scalatest.{FlatSpec, Matchers} import org.specs2.mock.{Mockito => MockitoTrait} -import cromwell.core.ExecutionEvent import scala.collection.JavaConverters._ @@ -36,8 +34,6 @@ class RunSpec extends FlatSpec with Matchers with MockitoTrait { op.setMetadata(metadata.asJava) - val mockedCredentials = new MockGoogleCredential.Builder().build() - val genomics = new Genomics(mockedCredentials.getTransport, mockedCredentials.getJsonFactory, mockedCredentials) val list = Run.getEventList(op) list should contain theSameElementsAs List( ExecutionEvent("waiting for quota", OffsetDateTime.parse("2015-12-05T00:00:00+00:00")), diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala index 1eaa422977a..c7434419a40 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala @@ -2,7 +2,7 @@ package cromwell.backend.impl.jes.statuspolling import akka.actor.{ActorRef, Props} import akka.testkit.{TestActorRef, TestProbe} -import cromwell.backend.impl.jes.Run +import cromwell.backend.impl.jes.{JesConfiguration, Run} import cromwell.core.TestKitSuite import org.scalatest.{FlatSpecLike, Matchers} @@ -65,49 +65,35 @@ class JesApiQueryManagerSpec extends TestKitSuite("JesApiQueryManagerSpec") with } AkkaTestUtil.actorDeathMethods(system) foreach { case (name, stopMethod) => - it should s"catch polling actors if they $name and then recreate them" in { - + /* + This test creates two statusPoller ActorRefs which are handed to the TestJesApiQueryManager. Work is added to that query + manager and then the first statusPoller requests work and is subsequently killed. The expectation is that: + + - The work will return to the workQueue of the query manager + - The query manager will have registered a new statusPoller + - That statusPoller is the second ActorRef (and artifact of TestJesApiQueryManager) + */ + it should s"catch polling actors if they $name, recreate them and add work back to the queue" in { val statusPoller1 = TestActorRef(Props(new AkkaTestUtil.DeathTestActor()), TestActorRef(new AkkaTestUtil.StoppingSupervisor())) - val statusPoller2 = TestActorRef(Props(new AkkaTestUtil.DeathTestActor())) + val statusPoller2 = TestActorRef(Props(new AkkaTestUtil.DeathTestActor()), TestActorRef(new AkkaTestUtil.StoppingSupervisor())) val jaqmActor: TestActorRef[TestJesApiQueryManager] = TestActorRef(TestJesApiQueryManager.props(statusPoller1, statusPoller2)) - val statusRequesters = ((0 until BatchSize * 2) map { i => i -> TestProbe(name = s"StatusRequester_$i") }).toMap + val emptyActor = system.actorOf(Props.empty) - // Send a few status poll requests: + // Send a few status poll requests: BatchSize indexedTimes { index => - val probe = statusRequesters(index) - jaqmActor.tell(msg = JesApiQueryManager.DoPoll(Run(index.toString, null)), sender = probe.ref) - } - BatchSize indexedTimes { i => - val index = i + BatchSize // For the second half of the statusRequester set - val probe = statusRequesters(index) - jaqmActor.tell(msg = JesApiQueryManager.DoPoll(Run(index.toString, null)), sender = probe.ref) + jaqmActor.tell(msg = JesApiQueryManager.DoPoll(Run(index.toString, null)), sender = emptyActor) } - // Request a set of work from the middle of the queue: - val batchOffset = 2 - jaqmActor.tell(msg = JesApiQueryManager.RequestJesPollingWork(batchOffset), sender = statusPoller1) jaqmActor.tell(msg = JesApiQueryManager.RequestJesPollingWork(BatchSize), sender = statusPoller1) - // Kill the original status poller: stopMethod(statusPoller1) - // Only the appropriate requesters get an error: - (0 until batchOffset) foreach { index => - val probe = statusRequesters(index) - probe.expectNoMsg(max = AwaitAlmostNothing) - } - (batchOffset until batchOffset + BatchSize) foreach { index => - val probe = statusRequesters(index) - probe.expectMsg(max = TestExecutionTimeout, hint = s"Polling error to requester #$index", obj = JesPollingActor.JesPollError) - } - (batchOffset + BatchSize until 2 * BatchSize) foreach { index => - val probe = statusRequesters(index) - probe.expectNoMsg(max = AwaitAlmostNothing) + eventually { + jaqmActor.underlyingActor.testPollerCreations should be (2) + jaqmActor.underlyingActor.queueSize should be (BatchSize) + jaqmActor.underlyingActor.statusPollerEquals(statusPoller2) should be (true) } - - // Check the next status poller gets created: - eventually { jaqmActor.underlyingActor.testPollerCreations should be(2) } } } } @@ -122,8 +108,7 @@ object JesApiQueryManagerSpec { /** * This test class allows us to hook into the JesApiQueryManager's makeStatusPoller and provide our own TestProbes instead */ -class TestJesApiQueryManager(statusPollerProbes: ActorRef*) extends JesApiQueryManager { - +class TestJesApiQueryManager(qps: Int, statusPollerProbes: ActorRef*) extends JesApiQueryManager(qps) { var testProbes: Queue[ActorRef] = _ var testPollerCreations: Int = _ @@ -133,7 +118,7 @@ class TestJesApiQueryManager(statusPollerProbes: ActorRef*) extends JesApiQueryM } override private[statuspolling] def makeStatusPoller(): ActorRef = { - // Initialise the queue, if necessary: + // Initialize the queue, if necessary: if (testProbes == null) { init() } @@ -146,8 +131,14 @@ class TestJesApiQueryManager(statusPollerProbes: ActorRef*) extends JesApiQueryM testProbes = newQueue probe } + + def queueSize = workQueue.size + def statusPollerEquals(otherStatusPoller: ActorRef) = statusPoller == otherStatusPoller } object TestJesApiQueryManager { - def props(statusPollers: ActorRef*): Props = Props(new TestJesApiQueryManager(statusPollers: _*)) + import cromwell.backend.impl.jes.JesTestConfig.JesBackendConfigurationDescriptor + val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) + + def props(statusPollers: ActorRef*): Props = Props(new TestJesApiQueryManager(jesConfiguration.qps, statusPollers: _*)) } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala index b861cbf0f51..aa29b0df382 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala @@ -13,7 +13,7 @@ import com.google.api.client.googleapis.batch.BatchRequest import com.google.api.client.googleapis.batch.json.JsonBatchCallback import com.google.api.client.googleapis.json.GoogleJsonError import com.google.api.services.genomics.model.Operation -import cromwell.backend.impl.jes.{Run, RunStatus} +import cromwell.backend.impl.jes.{JesConfiguration, Run, RunStatus} import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.JesStatusPollQuery import cromwell.backend.impl.jes.statuspolling.JesPollingActor.JesPollFailed import cromwell.backend.impl.jes.statuspolling.TestJesPollingActor.{CallbackFailure, CallbackSuccess, JesBatchCallbackResponse} @@ -29,9 +29,17 @@ class JesPollingActorSpec extends TestKitSuite("JesPollingActor") with FlatSpecL implicit val DefaultPatienceConfig = PatienceConfig(TestExecutionTimeout) val AwaitAlmostNothing = 30.milliseconds.dilated + import cromwell.backend.impl.jes.JesTestConfig.JesBackendConfigurationDescriptor + val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) + var managerProbe: TestProbe = _ var jpActor: TestActorRef[TestJesPollingActor] = _ + it should "correctly calculate batch intervals" in { + JesPollingActor.determineBatchInterval(10) shouldBe 9.seconds + JesPollingActor.determineBatchInterval(100) shouldBe 1.second + } + it should "query for work and wait for a reply" in { managerProbe.expectMsgClass(max = TestExecutionTimeout, c = classOf[JesApiQueryManager.RequestJesPollingWork]) managerProbe.expectNoMsg(max = AwaitAlmostNothing) @@ -77,7 +85,7 @@ class JesPollingActorSpec extends TestKitSuite("JesPollingActor") with FlatSpecL before { managerProbe = TestProbe() - jpActor = TestActorRef(TestJesPollingActor.props(managerProbe.ref), managerProbe.ref) + jpActor = TestActorRef(TestJesPollingActor.props(managerProbe.ref, jesConfiguration), managerProbe.ref) } } @@ -94,8 +102,9 @@ object JesPollingActorSpec extends Mockito { * - Mocks out the methods which actually call out to JES, and allows the callbacks to be triggered in a testable way * - Also waits a **lot** less time before polls! */ -class TestJesPollingActor(manager: ActorRef) extends JesPollingActor(manager) with Mockito { - override val BatchInterval = 10.milliseconds +class TestJesPollingActor(manager: ActorRef, qps: Int) extends JesPollingActor(manager, qps) with Mockito { + + override lazy val batchInterval = 10.milliseconds var operationStatusResponses: Queue[RunStatus] = Queue.empty var resultHandlers: Queue[JsonBatchCallback[Operation]] = Queue.empty @@ -123,7 +132,7 @@ class TestJesPollingActor(manager: ActorRef) extends JesPollingActor(manager) wi } object TestJesPollingActor { - def props(manager: ActorRef) = Props(new TestJesPollingActor(manager)) + def props(manager: ActorRef, jesConfiguration: JesConfiguration) = Props(new TestJesPollingActor(manager, jesConfiguration.qps)) sealed trait JesBatchCallbackResponse case object CallbackSuccess extends JesBatchCallbackResponse diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala index ad9761bca10..a47209c20ea 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala @@ -48,15 +48,20 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut * @param taskName The name of the task to retrieve from the precomputed wdl namespace. * @param inputs The customized inputs to this task. */ - def writeTaskScript(script: File, taskName: String, inputs: CallInputs): Unit = { + def writeTaskScript(script: File, taskName: String, inputs: WorkflowCoercedInputs): Unit = { val task = configInitializationData.wdlNamespace.findTask(taskName). getOrElse(throw new RuntimeException(s"Unable to find task $taskName")) - val command = task.instantiateCommand(inputs, NoFunctions).get + val inputsWithFqns = inputs map { case (k, v) => s"$taskName.$k" -> v } + val command = task.instantiateCommand(task.inputsFromMap(inputsWithFqns), NoFunctions).get jobLogger.info(s"executing: $command") - script.write( - s"""|#!/bin/bash - |$command - |""".stripMargin) + val scriptBody = + s""" + +#!/bin/bash +$command + +""".trim + "\n" + script.write(scriptBody) () } @@ -64,7 +69,7 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut * The inputs that are not specified by the config, that will be passed into a command for both submit and * submit-docker. */ - private lazy val standardInputs: CallInputs = { + private lazy val standardInputs: WorkflowCoercedInputs = { Map( JobNameInput -> WdlString(jobName), CwdInput -> WdlString(jobPaths.callRoot.toString), @@ -77,7 +82,7 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut /** * Extra arguments if this is a submit-docker command, or Map.empty. */ - private lazy val dockerInputs: CallInputs = { + private lazy val dockerInputs: WorkflowCoercedInputs = { if (isDockerRun) { Map( DockerCwdInput -> WdlString(jobPaths.callDockerRoot.toString) @@ -91,7 +96,7 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut * The arguments generated from the backend config's list of attributes. These will include things like CPU, memory, * and other custom arguments like "backend_queue_name", "backend_billing_project", etc. */ - private lazy val runtimeAttributeInputs: CallInputs = { + private lazy val runtimeAttributeInputs: WorkflowCoercedInputs = { val declarationValidations = configInitializationData.declarationValidations val inputOptions = declarationValidations map { declarationValidation => declarationValidation.extractWdlValueOption(validatedRuntimeAttributes) map { wdlValue => diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala index 0fc377aac55..f8ff92cfd3b 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala @@ -3,13 +3,21 @@ package cromwell.backend.impl.sfs.config import akka.event.LoggingAdapter import better.files._ import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.core.path.DefaultPathBuilder import cromwell.util.TryWithResource._ +import scala.language.postfixOps import scala.util.Try private[config] object ConfigBackendFileHashing { - def getMd5Result(request: SingleFileHashRequest, log: LoggingAdapter): Try[String] = - tryWithResource(() => File(request.file.valueString).newInputStream) { inputStream => + def getMd5Result(request: SingleFileHashRequest, log: LoggingAdapter): Try[String] ={ + val path = DefaultPathBuilder.build(request.file.valueString) recover { + case failure => throw new RuntimeException("Failed to construct path to hash", failure) + } get + + tryWithResource(() => File(path).newInputStream) { inputStream => org.apache.commons.codec.digest.DigestUtils.md5Hex(inputStream) } + } + } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala index 6261e65c96d..f453cf315db 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala @@ -4,13 +4,15 @@ import akka.event.LoggingAdapter import better.files.File import com.typesafe.config.Config import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.backend.sfs.SharedFileSystemBackendInitializationData +import cromwell.core.path.PathFactory import cromwell.util.TryWithResource._ import cromwell.util.FileUtil._ import net.ceedubs.ficus.Ficus._ import org.apache.commons.codec.digest.DigestUtils import org.slf4j.LoggerFactory -import scala.util.Try +import scala.util.{Failure, Try} object ConfigHashingStrategy { val logger = LoggerFactory.getLogger(getClass) @@ -37,14 +39,22 @@ abstract class ConfigHashingStrategy { protected lazy val checkSiblingMessage = if (checkSiblingMd5) "Check first for sibling md5 and if not found " else "" def getHash(request: SingleFileHashRequest, log: LoggingAdapter): Try[String] = { - val file = File(request.file.valueString).followSymlinks + def usingSFSInitData(initData: SharedFileSystemBackendInitializationData) = { + val pathBuilders = initData.workflowPaths.pathBuilders + val file = PathFactory.buildFile(request.file.valueString, pathBuilders).followSymlinks - if (checkSiblingMd5) { - precomputedMd5(file) match { - case Some(md5) => Try(md5.contentAsString) - case None => hash(file) - } - } else hash(file) + if (checkSiblingMd5) { + precomputedMd5(file) match { + case Some(md5) => Try(md5.contentAsString) + case None => hash(file) + } + } else hash(file) + } + + request.initializationData match { + case Some(initData: SharedFileSystemBackendInitializationData) => usingSFSInitData(initData) + case _ => Failure(new IllegalArgumentException("Need SharedFileSystemBackendInitializationData to calculate hash.")) + } } private def precomputedMd5(file: File): Option[File] = { diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala index cb56e35a082..161768da1c6 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala @@ -44,7 +44,7 @@ class ConfigWdlNamespace(backendConfig: Config) { */ val wdlNamespace = { try { - WdlNamespace.load(wdlSource) + WdlNamespace.loadUsingSource(wdlSource, None, None) } catch { case exception: Exception => throw new RuntimeException(s"Error parsing generated wdl:\n$wdlSource".stripMargin, exception) @@ -74,7 +74,7 @@ object ConfigWdlNamespace { private def makeTask(taskName: String, command: String, declarations: String): Task = { val wdlSource = makeWdlSource(taskName, command, declarations) - val wdlNamespace = WdlNamespace.load(wdlSource) + val wdlNamespace = WdlNamespace.loadUsingSource(wdlSource, None, None) wdlNamespace.findTask(taskName).getOrElse(throw new RuntimeException(s"Couldn't find task $taskName")) } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala index ec3aecbf76d..37a42a5c38a 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala @@ -24,7 +24,7 @@ object DeclarationValidation { * @return The DeclarationValidation object for the declaration. */ def fromDeclaration(declaration: Declaration): DeclarationValidation = { - declaration.name match { + declaration.unqualifiedName match { // Docker and CPU are special keys understood by cromwell. case DockerValidation.key => new DeclarationValidation(declaration, DockerValidation.instance) case CpuValidation.key => new DeclarationValidation(declaration, CpuValidation.default) @@ -34,11 +34,11 @@ object DeclarationValidation { // All other declarations must be a Boolean, Float, Integer, or String. case _ => val validator: PrimitiveRuntimeAttributesValidation[_] = declaration.wdlType match { - case WdlBooleanType => new BooleanRuntimeAttributesValidation(declaration.name) - case WdlFloatType => new FloatRuntimeAttributesValidation(declaration.name) - case WdlIntegerType => new IntRuntimeAttributesValidation(declaration.name) - case WdlStringType => new StringRuntimeAttributesValidation(declaration.name) - case other => throw new RuntimeException(s"Unsupported config runtime attribute $other ${declaration.name}") + case WdlBooleanType => new BooleanRuntimeAttributesValidation(declaration.unqualifiedName) + case WdlFloatType => new FloatRuntimeAttributesValidation(declaration.unqualifiedName) + case WdlIntegerType => new IntRuntimeAttributesValidation(declaration.unqualifiedName) + case WdlStringType => new StringRuntimeAttributesValidation(declaration.unqualifiedName) + case other => throw new RuntimeException(s"Unsupported config runtime attribute $other ${declaration.unqualifiedName}") } new DeclarationValidation(declaration, validator) } @@ -52,7 +52,7 @@ object DeclarationValidation { * @param instanceValidation A basic instance validation for the declaration. */ class DeclarationValidation(declaration: Declaration, instanceValidation: RuntimeAttributesValidation[_]) { - val key = declaration.name + val key = declaration.unqualifiedName /** * Creates a validation, by adding on defaults if they're specified in the declaration, and then making the @@ -74,7 +74,7 @@ class DeclarationValidation(declaration: Declaration, instanceValidation: Runtim val validationDefault = if (declaration.expression.isDefined) default(instanceValidation, declaration.expression.get) else instanceValidation - if (declaration.postfixQuantifier.contains("?")) validationDefault.optional else validationDefault + if (declaration.wdlType.isInstanceOf[WdlOptionalType]) validationDefault.optional else validationDefault } /** @@ -151,7 +151,7 @@ class MemoryDeclarationValidation(declaration: Declaration) } private lazy val declarationMemoryUnit: MemoryUnit = { - val suffix = memoryUnitSuffix(declaration.name) + val suffix = memoryUnitSuffix(declaration.unqualifiedName) val memoryUnitOption = MemoryUnit.values.find(_.suffixes.map(_.toLowerCase).contains(suffix.toLowerCase)) memoryUnitOption match { case Some(memoryUnit) => memoryUnit diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala deleted file mode 100644 index d96140014f3..00000000000 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala +++ /dev/null @@ -1,36 +0,0 @@ -package cromwell.backend.sfs - -import cats.data.Validated.{Invalid, Valid} -import cromwell.backend.wfs.{WorkflowFileSystemProvider, WorkflowFileSystemProviderParams} -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions -import cromwell.filesystems.gcs.{GcsFileSystem, GcsFileSystemProvider, GoogleConfiguration} -import net.ceedubs.ficus.Ficus._ -import wdl4s.ValidationException - -import scala.util.Try - -object GcsWorkflowFileSystemProvider extends WorkflowFileSystemProvider { - override def fileSystemOption(params: WorkflowFileSystemProviderParams): Option[GcsFileSystem] = { - params.fileSystemConfig.as[Option[String]]("gcs.auth") map gcsFileSystem(params) - } - - private def gcsFileSystem(params: WorkflowFileSystemProviderParams)(gcsAuthName: String): GcsFileSystem = { - val workflowOptions = params.workflowOptions - val globalConfig = params.globalConfig - val googleConfig = GoogleConfiguration(globalConfig) - val googleAuthModeValidation = googleConfig.auth(gcsAuthName) - - val gcsAuthMode = googleAuthModeValidation match { - case Valid(googleAuthMode) => googleAuthMode - case Invalid(errors) => - throw new ValidationException("Could not create gcs filesystem from configuration", errors) - } - - val authOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = workflowOptions.get(key) - } - - val storage = gcsAuthMode.buildStorage(authOptions, googleConfig.applicationName) - GcsFileSystem(GcsFileSystemProvider(storage)(params.fileSystemExecutionContext)) - } -} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala index 20c8c3d39bc..188391c450b 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala @@ -1,22 +1,24 @@ package cromwell.backend.sfs -import java.nio.file.{FileSystem, Path, Paths} +import java.nio.file.{Path, Paths} import cats.instances.try_._ import cats.syntax.functor._ import com.typesafe.config.Config +import com.typesafe.scalalogging.StrictLogging import cromwell.backend.io.JobPaths import cromwell.core._ -import wdl4s.CallInputs +import cromwell.core.path.PathFactory +import cromwell.util.TryUtil +import wdl4s.EvaluatedTaskInputs import wdl4s.types.{WdlArrayType, WdlMapType} -import wdl4s.util.TryUtil import wdl4s.values._ import scala.collection.JavaConverters._ import scala.language.postfixOps import scala.util.{Failure, Success, Try} -object SharedFileSystem { +object SharedFileSystem extends StrictLogging { import better.files._ final case class AttemptedLookupResult(name: String, value: Try[WdlValue]) { @@ -42,31 +44,38 @@ object SharedFileSystem { } private def localizePathViaCopy(originalPath: File, executionPath: File): Try[Unit] = { - executionPath.parent.createDirectories() - val executionTmpPath = pathPlusSuffix(executionPath, ".tmp") - Try(originalPath.copyTo(executionTmpPath, overwrite = true).moveTo(executionPath, overwrite = true)).void + val action = Try { + executionPath.parent.createDirectories() + val executionTmpPath = pathPlusSuffix(executionPath, ".tmp") + originalPath.copyTo(executionTmpPath, overwrite = true).moveTo(executionPath, overwrite = true) + }.void + logOnFailure(action, "copy") } private def localizePathViaHardLink(originalPath: File, executionPath: File): Try[Unit] = { - executionPath.parent.createDirectories() - // link.linkTo(target) returns target, - // however we want to return the link, not the target, so map the result back to executionPath - - // -Ywarn-value-discard - // Try(executionPath.linkTo(originalPath, symbolic = false)) map { _ => executionPath } - Try { executionPath.linkTo(originalPath, symbolic = false) } void + val action = Try { + executionPath.parent.createDirectories() + executionPath.linkTo(originalPath, symbolic = false) + }.void + logOnFailure(action, "hard link") } private def localizePathViaSymbolicLink(originalPath: File, executionPath: File): Try[Unit] = { if (originalPath.isDirectory) Failure(new UnsupportedOperationException("Cannot localize directory with symbolic links")) else { - executionPath.parent.createDirectories() - // -Ywarn-value-discard - // Try(executionPath.linkTo(originalPath, symbolic = true)) map { _ => executionPath } - Try { executionPath.linkTo(originalPath, symbolic = true) } void + val action = Try { + executionPath.parent.createDirectories() + executionPath.linkTo(originalPath, symbolic = true) + }.void + logOnFailure(action, "symbolic link") } } + private def logOnFailure(action: Try[Unit], actionLabel: String): Try[Unit] = { + if (action.isFailure) logger.warn(s"Localization via $actionLabel has failed: ${action.failed.get.getMessage}") + action + } + private def duplicate(description: String, source: File, dest: File, strategies: Stream[DuplicationStrategy]) = { import cromwell.util.FileUtil._ @@ -134,6 +143,9 @@ trait SharedFileSystem extends PathFactory { case array: WdlArray => val mappedArray = array.value map outputMapper(job) TryUtil.sequence(mappedArray) map { WdlArray(array.wdlType, _) } + case map: WdlMap => + val mappedMap = map.value mapValues outputMapper(job) + TryUtil.sequenceMap(mappedMap) map { WdlMap(map.wdlType, _) } case other => Success(other) } } @@ -145,11 +157,8 @@ trait SharedFileSystem extends PathFactory { /** * Return a possibly altered copy of inputs reflecting any localization of input file paths that might have * been performed for this `Backend` implementation. - * NOTE: This ends up being a backdoor implementation of Backend.adjustInputPaths as both LocalBackend and SgeBackend - * end up with this implementation and thus use it to satisfy their contract with Backend. - * This is yuck-tastic and I consider this a FIXME, but not for this refactor */ - def localizeInputs(inputsRoot: Path, docker: Boolean, filesystems: List[FileSystem], inputs: CallInputs): Try[CallInputs] = { + def localizeInputs(inputsRoot: Path, docker: Boolean)(inputs: EvaluatedTaskInputs): Try[EvaluatedTaskInputs] = { val strategies = if (docker) DockerLocalizers else Localizers // Use URI to identify protocol scheme and strip it out @@ -161,12 +170,13 @@ trait SharedFileSystem extends PathFactory { host map { h => Paths.get(h, uriPath) } getOrElse Paths.get(uriPath) } - /** + /* * Transform an original input path to a path in the call directory. * The new path matches the original path, it only "moves" the root to be the call directory. */ + def toCallPath(path: String): Try[PairOfFiles] = Try { - val src = buildFile(path, filesystems) + val src = buildFile(path) // Strip out potential prefix protocol val localInputPath = stripProtocolScheme(src.path) val dest = if (File(inputsRoot).isParentOf(localInputPath)) File(localInputPath) @@ -181,7 +191,7 @@ trait SharedFileSystem extends PathFactory { // Optional function to adjust the path to "docker path" if the call runs in docker val localizeFunction = localizeWdlValue(toCallPath, strategies.toStream) _ val localizedValues = inputs.toSeq map { - case (name, value) => localizeFunction(value) map { name -> _ } + case (declaration, value) => localizeFunction(value) map { declaration -> _ } } TryUtil.sequence(localizedValues, "Failures during localization").map(_.toMap) recover { diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala index 3013b12af94..ef5ef5e89f5 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala @@ -1,6 +1,6 @@ package cromwell.backend.sfs -import java.nio.file.{FileAlreadyExistsException, Path, Paths} +import java.nio.file.{FileAlreadyExistsException, Path} import akka.actor.{Actor, ActorLogging, ActorRef} import akka.event.LoggingReceive @@ -12,9 +12,11 @@ import cromwell.backend.async.{AbortedExecutionHandle, AsyncBackendJobExecutionA import cromwell.backend.io.WorkflowPathsBackendInitializationData import cromwell.backend.sfs.SharedFileSystem._ import cromwell.backend.validation._ -import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor, OutputEvaluator} -import cromwell.core.JobOutputs +import cromwell.backend.wdl.{OutputEvaluator, Command} +import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor} +import cromwell.core.CallOutputs import cromwell.core.logging.JobLogging +import cromwell.core.path.DefaultPathBuilder import cromwell.core.retry.SimpleExponentialBackoff import cromwell.services.keyvalue.KeyValueServiceActor._ import wdl4s.values.{WdlArray, WdlFile, WdlMap, WdlValue} @@ -135,11 +137,13 @@ trait SharedFileSystemAsyncJobExecutionActor override lazy val backendInitializationDataOption = params.backendInitializationDataOption - def toDockerPath(path: WdlValue): WdlValue = { + def toUnixPath(docker: Boolean)(path: WdlValue): WdlValue = { path match { - case file: WdlFile => WdlFile(jobPaths.toDockerPath(Paths.get(path.valueString)).toString) - case array: WdlArray => WdlArray(array.wdlType, array.value map toDockerPath) - case map: WdlMap => WdlMap(map.wdlType, map.value mapValues toDockerPath) + case file: WdlFile => + val cleanPath = DefaultPathBuilder.build(path.valueString).get + WdlFile(if (docker) jobPaths.toDockerPath(cleanPath).toString else cleanPath.toString) + case array: WdlArray => WdlArray(array.wdlType, array.value map toUnixPath(docker)) + case map: WdlMap => WdlMap(map.wdlType, map.value mapValues toUnixPath(docker)) case wdlValue => wdlValue } } @@ -150,8 +154,8 @@ trait SharedFileSystemAsyncJobExecutionActor lazy val workflowDescriptor = jobDescriptor.workflowDescriptor lazy val call = jobDescriptor.key.call - lazy val fileSystems = WorkflowPathsBackendInitializationData.fileSystems(backendInitializationDataOption) - lazy val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, fileSystems) + lazy val pathBuilders = WorkflowPathsBackendInitializationData.pathBuilders(backendInitializationDataOption) + lazy val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, pathBuilders) override lazy val workflowId = jobDescriptor.workflowDescriptor.id override lazy val jobTag = jobDescriptor.key.tag @@ -165,12 +169,18 @@ trait SharedFileSystemAsyncJobExecutionActor } def instantiatedScript: String = { - val pathTransformFunction: WdlValue => WdlValue = if (isDockerRun) toDockerPath else identity - val tryCommand = sharedFileSystem.localizeInputs(jobPaths.callInputsRoot, - isDockerRun, fileSystems, jobDescriptor.inputs) flatMap { localizedInputs => - call.task.instantiateCommand(localizedInputs, callEngineFunction, pathTransformFunction) + val pathTransformFunction = toUnixPath(isDockerRun) _ + val localizer = sharedFileSystem.localizeInputs(jobPaths.callInputsRoot, isDockerRun) _ + + Command.instantiate( + jobDescriptor, + callEngineFunction, + localizer, + pathTransformFunction + ) match { + case Success(command) => command + case Failure(ex) => throw new RuntimeException("Failed to instantiate command line", ex) } - tryCommand.get } override def executeOrRecover(mode: ExecutionMode)(implicit ec: ExecutionContext) = { @@ -244,14 +254,19 @@ trait SharedFileSystemAsyncJobExecutionActor val rcPath = if (isDockerRun) jobPaths.toDockerPath(jobPaths.returnCode) else jobPaths.returnCode val rcTmpPath = s"$rcPath.tmp" - File(jobPaths.script).write( - s"""#!/bin/sh - |( - | cd $cwd - | $instantiatedCommand - |) - |echo $$? > $rcTmpPath - |mv $rcTmpPath $rcPath""".stripMargin) + val scriptBody = s""" + +#!/bin/sh +( + cd $cwd + $instantiatedCommand +) +echo $$? > $rcTmpPath +mv $rcTmpPath $rcPath + +""".trim + "\n" + + File(jobPaths.script).write(scriptBody) } /** @@ -313,7 +328,7 @@ trait SharedFileSystemAsyncJobExecutionActor def processReturnCode()(implicit ec: ExecutionContext): Future[ExecutionHandle] = { val returnCodeTry = Try(File(jobPaths.returnCode).contentAsString.stripLineEnd.toInt) - lazy val badReturnCodeMessage = s"Call ${call.fullyQualifiedName}: return code was ${returnCodeTry.getOrElse("(none)")}" + lazy val badReturnCodeMessage = s"Call ${jobDescriptor.key.tag}: return code was ${returnCodeTry.getOrElse("(none)")}" lazy val badReturnCodeResponse = Future.successful( FailedNonRetryableExecutionHandle(new Exception(badReturnCodeMessage), returnCodeTry.toOption)) @@ -323,7 +338,7 @@ trait SharedFileSystemAsyncJobExecutionActor def processSuccess(returnCode: Int) = { val successfulFuture = for { outputs <- Future.fromTry(processOutputs()) - } yield SuccessfulExecutionHandle(outputs, returnCode, jobPaths.detritusPaths.mapValues(_.toString), Seq.empty) + } yield SuccessfulExecutionHandle(outputs, returnCode, jobPaths.detritusPaths, Seq.empty) successfulFuture recover { case failed: Throwable => @@ -371,7 +386,7 @@ trait SharedFileSystemAsyncJobExecutionActor } } - private def processOutputs(): Try[JobOutputs] = { + private def processOutputs(): Try[CallOutputs] = { OutputEvaluator.evaluateOutputs(jobDescriptor, callEngineFunction, sharedFileSystem.outputMapper(jobPaths)) } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala index edacd530311..1ac3093dce1 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala @@ -1,11 +1,16 @@ package cromwell.backend.sfs import akka.actor.{ActorRef, Props} +import cats.data.Validated.{Invalid, Valid} import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendLifecycleActorFactory, BackendWorkflowDescriptor} import cromwell.core.Dispatcher import cromwell.core.Dispatcher._ -import wdl4s.Call +import cromwell.core.path.{DefaultPathBuilderFactory, PathBuilderFactory} +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, GoogleConfiguration} +import lenthall.exception.MessageAggregation +import net.ceedubs.ficus.Ficus._ +import wdl4s.TaskCall import wdl4s.expression.WdlStandardLibraryFunctions import scala.concurrent.Promise @@ -17,6 +22,23 @@ import scala.concurrent.Promise */ trait SharedFileSystemBackendLifecycleActorFactory extends BackendLifecycleActorFactory { + /** + * If the backend sets a gcs authentication mode, try to create a PathBuilderFactory with it. + */ + lazy val gcsPathBuilderFactory: Option[GcsPathBuilderFactory] = { + configurationDescriptor.backendConfig.as[Option[String]]("filesystems.gcs.auth") map { configAuth => + GoogleConfiguration(configurationDescriptor.globalConfig).auth(configAuth) match { + case Valid(auth) => GcsPathBuilderFactory(auth) + case Invalid(error) => throw new MessageAggregation { + override def exceptionContext: String = "Failed to parse gcs auth configuration" + override def errorMessages: Traversable[String] = error.toList + } + } + } + } + + lazy val pathBuilderFactories: List[PathBuilderFactory] = List(gcsPathBuilderFactory, Option(DefaultPathBuilderFactory)).flatten + /** * Config values for the backend, and a pointer to the global config. * @@ -41,10 +63,10 @@ trait SharedFileSystemBackendLifecycleActorFactory extends BackendLifecycleActor */ def asyncJobExecutionActorClass: Class[_ <: SharedFileSystemAsyncJobExecutionActor] - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], + override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], serviceRegistryActor: ActorRef) = { val params = SharedFileSystemInitializationActorParams(serviceRegistryActor, workflowDescriptor, - configurationDescriptor, calls) + configurationDescriptor, calls, pathBuilderFactories) Option(Props(initializationActorClass, params).withDispatcher(Dispatcher.BackendDispatcher)) } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala index 62dc98b0781..6ba44914da9 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala @@ -1,10 +1,13 @@ package cromwell.backend.sfs -import java.nio.file.{Path, Paths} +import java.nio.file.Path import akka.actor.ActorRef import cromwell.backend.callcaching.CacheHitDuplicating import cromwell.backend.{BackendCacheHitCopyingActor, BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor} +import cromwell.core.path.PathFactory + +import scala.util.Try class SharedFileSystemCacheHitCopyingActor(override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor, @@ -17,7 +20,7 @@ class SharedFileSystemCacheHitCopyingActor(override val jobDescriptor: BackendJo override lazy val destinationJobDetritusPaths = jobPaths.detritusPaths - override protected def getPath(file: String) = Paths.get(file) + override protected def getPath(file: String) = Try(PathFactory.buildPath(file, jobPaths.pathBuilders)) override protected def duplicate(source: Path, destination: Path) = { // -Ywarn-value-discard diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala index 1f73cac3838..7dc5172ba91 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala @@ -1,12 +1,13 @@ package cromwell.backend.sfs -import java.nio.file.{FileSystem, Path} +import java.nio.file.Path -import cromwell.backend.io.{JobPaths, WorkflowPathsBackendInitializationData} +import cromwell.backend.io.{JobPaths, JobPathsWithDocker, WorkflowPathsBackendInitializationData} import cromwell.backend.wdl._ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptorKey, BackendWorkflowDescriptor} import cromwell.core.CallContext -import wdl4s.expression.WdlStandardLibraryFunctions +import cromwell.core.path.PathBuilder +import wdl4s.expression.PureStandardLibraryFunctionsLike import wdl4s.values.{WdlFile, WdlValue} import scala.language.postfixOps @@ -20,49 +21,50 @@ object SharedFileSystemExpressionFunctions { def apply(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, configurationDescriptor: BackendConfigurationDescriptor, - fileSystems: List[FileSystem]): SharedFileSystemExpressionFunctions = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) + pathBuilders: List[PathBuilder]): SharedFileSystemExpressionFunctions = { + val jobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, configurationDescriptor.backendConfig) val callContext = CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toString, jobPaths.stderr.toString ) - new SharedFileSystemExpressionFunctions(fileSystems, callContext) + new SharedFileSystemExpressionFunctions(pathBuilders, callContext) } - def apply(jobPaths: JobPaths, fileSystems: List[FileSystem]): SharedFileSystemExpressionFunctions = { + def apply(jobPaths: JobPaths, pathBuilders: List[PathBuilder]): SharedFileSystemExpressionFunctions = { val callContext = CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toString, jobPaths.stderr.toString ) - new SharedFileSystemExpressionFunctions(fileSystems, callContext) + new SharedFileSystemExpressionFunctions(pathBuilders, callContext) } def apply(workflowDescriptor: BackendWorkflowDescriptor, configurationDescriptor: BackendConfigurationDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]) = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) + val jobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, configurationDescriptor.backendConfig) val callContext = CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toString, jobPaths.stderr.toString ) - new SharedFileSystemExpressionFunctions(WorkflowPathsBackendInitializationData.fileSystems(initializationData), callContext) + new SharedFileSystemExpressionFunctions(WorkflowPathsBackendInitializationData.pathBuilders(initializationData), callContext) } } -class SharedFileSystemExpressionFunctions(override val fileSystems: List[FileSystem], +class SharedFileSystemExpressionFunctions(override val pathBuilders: List[PathBuilder], context: CallContext - ) extends WdlStandardLibraryFunctions with PureFunctions with ReadLikeFunctions with WriteFunctions { + ) extends PureStandardLibraryFunctionsLike with ReadLikeFunctions with WriteFunctions { import SharedFileSystemExpressionFunctions._ import better.files._ + override def writeTempFile(path: String, prefix: String, suffix: String, content: String): String = super[WriteFunctions].writeTempFile(path, prefix, suffix, content) override def globPath(glob: String) = context.root.toString override def glob(path: String, pattern: String): Seq[String] = { - File(toPath(path)).glob(s"**/$pattern") map { _.pathAsString } toSeq + File(context.root).glob(s"**/$pattern") map { _.pathAsString } toSeq } override val writeDirectory = context.root diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala index 54a370b3993..52c8fbeaf72 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala @@ -4,10 +4,11 @@ import akka.actor.ActorRef import better.files._ import cromwell.backend.io.{WorkflowPaths, WorkflowPathsBackendInitializationData} import cromwell.backend.validation.RuntimeAttributesDefault -import cromwell.backend.wfs.{DefaultWorkflowFileSystemProvider, WorkflowFileSystemProvider} +import cromwell.backend.wfs.WorkflowPathBuilder import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} -import cromwell.core.{Dispatcher, WorkflowOptions} -import wdl4s.Call +import cromwell.core.WorkflowOptions +import cromwell.core.path.PathBuilderFactory +import wdl4s.TaskCall import wdl4s.values.WdlValue import scala.concurrent.Future @@ -18,7 +19,8 @@ case class SharedFileSystemInitializationActorParams serviceRegistryActor: ActorRef, workflowDescriptor: BackendWorkflowDescriptor, configurationDescriptor: BackendConfigurationDescriptor, - calls: Seq[Call] + calls: Set[TaskCall], + pathBuilderFactories: List[PathBuilderFactory] ) class SharedFileSystemBackendInitializationData @@ -37,7 +39,7 @@ class SharedFileSystemInitializationActor(params: SharedFileSystemInitialization override lazy val workflowDescriptor: BackendWorkflowDescriptor = params.workflowDescriptor override lazy val configurationDescriptor: BackendConfigurationDescriptor = params.configurationDescriptor - override lazy val calls: Seq[Call] = params.calls + override lazy val calls: Set[TaskCall] = params.calls override lazy val serviceRegistryActor: ActorRef = params.serviceRegistryActor def runtimeAttributesBuilder: SharedFileSystemValidatedRuntimeAttributesBuilder = @@ -49,11 +51,9 @@ class SharedFileSystemInitializationActor(params: SharedFileSystemInitialization ).toMap } - val providers = Seq(GcsWorkflowFileSystemProvider, DefaultWorkflowFileSystemProvider) - val ioDispatcher = context.system.dispatchers.lookup(Dispatcher.IoDispatcher) + val pathBuilders = params.pathBuilderFactories map { _.withOptions(workflowDescriptor.workflowOptions)(context.system) } - val workflowPaths = WorkflowFileSystemProvider.workflowPaths(configurationDescriptor, workflowDescriptor, - providers, ioDispatcher) + val workflowPaths = WorkflowPathBuilder.workflowPaths(configurationDescriptor, workflowDescriptor, pathBuilders) override def beforeAll(): Future[Option[BackendInitializationData]] = { Future.fromTry(Try { diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala index d9d4f42137d..54453ab756e 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala @@ -4,7 +4,7 @@ import akka.actor.{Actor, ActorRef} import com.typesafe.config.{Config, ConfigFactory} import cromwell.backend.BackendInitializationData import cromwell.backend.callcaching.JobCachingActorHelper -import cromwell.backend.io.JobPaths +import cromwell.backend.io.{JobPathsWithDocker, WorkflowPathsBackendInitializationData} import cromwell.backend.validation.{RuntimeAttributesValidation, ValidatedRuntimeAttributes} import cromwell.core.logging.JobLogging import net.ceedubs.ficus.Ficus._ @@ -17,7 +17,7 @@ trait SharedFileSystemJobCachingActorHelper extends JobCachingActorHelper { def serviceRegistryActor: ActorRef lazy val jobPaths = - new JobPaths(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig, jobDescriptor.key) + new JobPathsWithDocker(jobDescriptor.key, jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig) lazy val initializationData = BackendInitializationData. as[SharedFileSystemBackendInitializationData](backendInitializationDataOption) @@ -37,6 +37,7 @@ trait SharedFileSystemJobCachingActorHelper extends JobCachingActorHelper { } lazy val sharedFileSystem = new SharedFileSystem { + override val pathBuilders = WorkflowPathsBackendInitializationData.pathBuilders(backendInitializationDataOption) override lazy val sharedFileSystemConfig = { configurationDescriptor.backendConfig.as[Option[Config]]("filesystems.local").getOrElse(ConfigFactory.empty()) } diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala index 81edb6f6062..bd25dfc5183 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala @@ -6,6 +6,9 @@ import akka.event.LoggingAdapter import better.files._ import com.typesafe.config.{ConfigFactory, ConfigValueFactory} import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.backend.io.WorkflowPaths +import cromwell.backend.sfs.SharedFileSystemBackendInitializationData +import cromwell.core.path.DefaultPathBuilder import org.apache.commons.codec.digest.DigestUtils import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} @@ -43,7 +46,14 @@ class ConfigHashingStrategySpec extends FlatSpec with Matchers with TableDrivenP symLink } else file + val workflowPaths = mock[WorkflowPaths] + workflowPaths.pathBuilders returns List(DefaultPathBuilder) + + val initData = mock[SharedFileSystemBackendInitializationData] + initData.workflowPaths returns workflowPaths + request.file returns WdlFile(requestFile.pathAsString) + request.initializationData returns Option(initData) request } diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala index 64dbfa9d73b..1cbfd2cfeee 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala @@ -8,7 +8,7 @@ import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescript import cromwell.core.TestKitSuite import cromwell.core.logging.LoggingTest._ import org.scalatest.{Matchers, WordSpecLike} -import wdl4s.Call +import wdl4s.TaskCall import scala.concurrent.duration._ @@ -30,14 +30,14 @@ class SharedFileSystemInitializationActorSpec extends TestKitSuite("SharedFileSy | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin - private def getActorRef(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], + private def getActorRef(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], conf: BackendConfigurationDescriptor) = { - val params = SharedFileSystemInitializationActorParams(emptyActor, workflowDescriptor, conf, calls) + val params = SharedFileSystemInitializationActorParams(emptyActor, workflowDescriptor, conf, calls, List.empty) val props = Props(new SharedFileSystemInitializationActor(params)) system.actorOf(props, "SharedFileSystemInitializationActor") } @@ -47,7 +47,7 @@ class SharedFileSystemInitializationActorSpec extends TestKitSuite("SharedFileSy within(Timeout) { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { unsupported: 1 }""") val conf = emptyBackendConfig - val backend = getActorRef(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, conf) + val backend = getActorRef(workflowDescriptor, workflowDescriptor.workflow.taskCalls, conf) val pattern = "Key/s [unsupported] is/are not supported by backend. " + "Unsupported attributes will not be part of jobs executions." EventFilter.warning(pattern = escapePattern(pattern), occurrences = 1) intercept { diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala index f697a7f4b11..cd573576620 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala @@ -5,17 +5,16 @@ import java.nio.file.{Files, Paths} import akka.testkit.TestDuration import better.files._ import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand import cromwell.backend.io.TestWorkflows._ -import cromwell.backend.io.{JobPaths, TestWorkflows} +import cromwell.backend.io.{JobPathsWithDocker, TestWorkflows} import cromwell.backend.sfs.TestLocalAsyncJobExecutionActor._ import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobDescriptorKey, BackendSpec, RuntimeAttributeDefinition} import cromwell.core.Tags._ import cromwell.core._ import cromwell.services.keyvalue.KeyValueServiceActor.{KvJobKey, KvPair, ScopedKey} import org.scalatest.concurrent.PatienceConfiguration.Timeout -import org.scalatest.mockito.MockitoSugar import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpecLike, OptionValues} import wdl4s.types._ @@ -25,17 +24,17 @@ import wdl4s.values._ import scala.concurrent.duration._ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSystemJobExecutionActorSpec") - with FlatSpecLike with BackendSpec with MockitoSugar with TableDrivenPropertyChecks with OptionValues { + with FlatSpecLike with BackendSpec with TableDrivenPropertyChecks with OptionValues { behavior of "SharedFileSystemJobExecutionActor" lazy val runtimeAttributeDefinitions = SharedFileSystemValidatedRuntimeAttributesBuilder.default.definitions.toSet def executeSpec(docker: Boolean) = { - val expectedOutputs: JobOutputs = Map( + val expectedOutputs: CallOutputs = Map( "salutation" -> JobOutput(WdlString("Hello you !")) ) - val expectedResponse = SucceededResponse(mock[BackendJobDescriptorKey], Some(0), expectedOutputs, None, Seq.empty) + val expectedResponse = JobSucceededResponse(mock[BackendJobDescriptorKey], Some(0), expectedOutputs, None, Seq.empty) val runtime = if (docker) """runtime { docker: "ubuntu:latest" }""" else "" val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = runtime) val workflow = TestWorkflow(workflowDescriptor, emptyBackendConfig, expectedResponse) @@ -52,7 +51,7 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst } it should "send back an execution failure if the task fails" in { - val expectedResponse = FailedNonRetryableResponse(mock[BackendJobDescriptorKey], new Exception(""), Option(1)) + val expectedResponse = JobFailedNonRetryableResponse(mock[BackendJobDescriptorKey], new Exception(""), Option(1)) val workflow = TestWorkflow(buildWorkflowDescriptor(GoodbyeWorld), emptyBackendConfig, expectedResponse) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflow.workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions), workflow.config) testWorkflow(workflow, backend) @@ -82,11 +81,11 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst val jsonInputFile = createCannedFile("localize", "content from json inputs").pathAsString val callInputFile = createCannedFile("localize", "content from call inputs").pathAsString val inputs = Map( - "inputFileFromCallInputs" -> WdlFile(callInputFile), - "inputFileFromJson" -> WdlFile(jsonInputFile) + "wf_localize.localize.inputFileFromCallInputs" -> WdlFile(callInputFile), + "wf_localize.localize.inputFileFromJson" -> WdlFile(jsonInputFile) ) - val expectedOutputs: JobOutputs = Map( + val expectedOutputs: CallOutputs = Map( "out" -> JobOutput(WdlArray(WdlArrayType(WdlStringType), Array( WdlString("content from json inputs"), @@ -106,10 +105,10 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst val runtime = if (docker) """runtime { docker: "ubuntu:latest" } """ else "" val workflowDescriptor = buildWorkflowDescriptor(InputFiles, inputs, runtime = runtime) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflowDescriptor, inputs, WorkflowOptions.empty, runtimeAttributeDefinitions), conf) - val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions) - val expectedResponse = SucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) + val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, inputs, WorkflowOptions.empty, runtimeAttributeDefinitions) + val expectedResponse = JobSucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) - val jobPaths = new JobPaths(workflowDescriptor, conf.backendConfig, jobDescriptor.key) + val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, conf.backendConfig) whenReady(backend.execute) { executionResponse => assertResponse(executionResponse, expectedResponse) @@ -157,7 +156,7 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst val backendRef = createBackendRef(jobDescriptor, emptyBackendConfig) val backend = backendRef.underlyingActor - val jobPaths = new JobPaths(workflowDescriptor, ConfigFactory.empty, jobDescriptor.key) + val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, ConfigFactory.empty) File(jobPaths.callExecutionRoot).createDirectories() File(jobPaths.stdout).write("Hello stubby ! ") File(jobPaths.stderr).touch() @@ -189,13 +188,13 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst whenReady(execute, Timeout(10.seconds.dilated)) { executionResponse => if (writeReturnCode) { - executionResponse should be(a[SucceededResponse]) - val succeededResponse = executionResponse.asInstanceOf[SucceededResponse] + executionResponse should be(a[JobSucceededResponse]) + val succeededResponse = executionResponse.asInstanceOf[JobSucceededResponse] succeededResponse.returnCode.value should be(0) succeededResponse.jobOutputs should be(Map("salutation" -> JobOutput(WdlString("Hello stubby !")))) } else { - executionResponse should be(a[FailedNonRetryableResponse]) - val failedResponse = executionResponse.asInstanceOf[FailedNonRetryableResponse] + executionResponse should be(a[JobFailedNonRetryableResponse]) + val failedResponse = executionResponse.asInstanceOf[JobFailedNonRetryableResponse] failedResponse.returnCode should be(empty) failedResponse.throwable should be(a[RuntimeException]) failedResponse.throwable.getMessage should startWith("Unable to determine that 0 is alive, and") @@ -219,20 +218,20 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst it should "execute shards from a scatter" in { val workflowDescriptor = buildWorkflowDescriptor(TestWorkflows.Scatter) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head 0 to 2 foreach { shard => // This assumes that engine will give us the evaluated value of the scatter item at the correct index // If this is not the case, more context/logic will need to be moved to the backend so it can figure it out by itself - val symbolMaps: Map[LocallyQualifiedName, WdlInteger] = Map("intNumber" -> WdlInteger(shard)) + val symbolMaps: Map[LocallyQualifiedName, WdlInteger] = Map("scattering.intNumber" -> WdlInteger(shard)) val runtimeAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes(runtimeAttributeDefinitions, WorkflowOptions.empty)(call.task.runtimeAttributes.attrs) val jobDescriptor: BackendJobDescriptor = - BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, Option(shard), 1), runtimeAttributes, symbolMaps) + BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, Option(shard), 1), runtimeAttributes, fqnMapToDeclarationMap(symbolMaps)) val backend = createBackend(jobDescriptor, emptyBackendConfig) val response = - SucceededResponse(mock[BackendJobDescriptorKey], Some(0), Map("out" -> JobOutput(WdlInteger(shard))), None, Seq.empty) + JobSucceededResponse(mock[BackendJobDescriptorKey], Some(0), Map("out" -> JobOutput(WdlInteger(shard))), None, Seq.empty) executeJobAndAssertOutputs(backend, response) } } @@ -240,12 +239,12 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst it should "post process outputs" in { val inputFile = createCannedFile("localize", "content from json inputs").pathAsString val inputs = Map { - "inputFile" -> WdlFile(inputFile) + "wf_localize.localize.inputFile" -> WdlFile(inputFile) } val workflowDescriptor = buildWorkflowDescriptor(OutputProcess, inputs) val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, inputs, WorkflowOptions.empty, runtimeAttributeDefinitions) val backend = createBackend(jobDescriptor, emptyBackendConfig) - val jobPaths = new JobPaths(workflowDescriptor, emptyBackendConfig.backendConfig, jobDescriptor.key) + val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, emptyBackendConfig.backendConfig) val expectedA = WdlFile(jobPaths.callExecutionRoot.resolve("a").toAbsolutePath.toString) val expectedB = WdlFile(jobPaths.callExecutionRoot.resolve("dir").toAbsolutePath.resolve("b").toString) val expectedOutputs = Map( @@ -253,13 +252,13 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst "o2" -> JobOutput(WdlArray(WdlArrayType(WdlFileType), Seq(expectedA, expectedB))), "o3" -> JobOutput(WdlFile(inputFile)) ) - val expectedResponse = SucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) + val expectedResponse = JobSucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) executeJobAndAssertOutputs(backend, expectedResponse) } - it should "fail post processing if an output fail is not found" in { - val expectedResponse = FailedNonRetryableResponse(mock[BackendJobDescriptorKey], + it should "fail post processing if an output file is not found" in { + val expectedResponse = JobFailedNonRetryableResponse(mock[BackendJobDescriptorKey], AggregatedException(Seq.empty, "Could not process output, file not found"), Option(0)) val workflow = TestWorkflow(buildWorkflowDescriptor(MissingOutputProcess), emptyBackendConfig, expectedResponse) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflow.workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions), workflow.config) diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala index c3874a14ae8..d7c39924ce7 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala @@ -1,22 +1,24 @@ package cromwell.backend.sfs -import java.nio.file.{FileSystems, Files} +import java.nio.file.Files import better.files._ import com.typesafe.config.{Config, ConfigFactory} +import cromwell.core.path.DefaultPathBuilder +import cromwell.backend.BackendSpec import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpec, Matchers} import org.specs2.mock.Mockito import wdl4s.values.WdlFile -class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with TableDrivenPropertyChecks { +class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with TableDrivenPropertyChecks with BackendSpec { behavior of "SharedFileSystem" val defaultLocalization = ConfigFactory.parseString(""" localization: [copy, hard-link, soft-link] """) val hardLinkLocalization = ConfigFactory.parseString(""" localization: [hard-link] """) val softLinkLocalization = ConfigFactory.parseString(""" localization: [soft-link] """) - val localFS = List(FileSystems.getDefault) + val localPathBuilder = List(DefaultPathBuilder) def localizationTest(config: Config, @@ -34,12 +36,16 @@ class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with Tabl dest.touch() } - val inputs = Map("input" -> WdlFile(orig.pathAsString)) - val sharedFS = new SharedFileSystem { override val sharedFileSystemConfig = config } - val result = sharedFS.localizeInputs(callDir.path, docker = docker, localFS, inputs) + val inputs = fqnMapToDeclarationMap(Map("input" -> WdlFile(orig.pathAsString))) + val sharedFS = new SharedFileSystem { + override val pathBuilders = localPathBuilder + override val sharedFileSystemConfig = config + } + val localizedinputs = Map(inputs.head._1 -> WdlFile(dest.pathAsString)) + val result = sharedFS.localizeInputs(callDir.path, docker = docker)(inputs) result.isSuccess shouldBe true - result.get should contain theSameElementsAs Map("input" -> WdlFile(dest.pathAsString)) + result.get should contain theSameElementsAs localizedinputs dest.exists shouldBe true countLinks(dest) should be(linkNb) diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala index 0ae09136744..72d2c6e9993 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala @@ -3,7 +3,7 @@ package cromwell.backend.sfs import akka.actor.{ActorSystem, Props} import akka.testkit.TestActorRef import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse -import cromwell.backend.io.WorkflowPaths +import cromwell.backend.io.WorkflowPathsWithDocker import cromwell.backend.validation.{DockerValidation, RuntimeAttributesValidation} import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor} @@ -34,7 +34,7 @@ object TestLocalAsyncJobExecutionActor { def createBackendRef(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor) (implicit system: ActorSystem): TestActorRef[SharedFileSystemJobExecutionActor] = { val emptyActor = system.actorOf(Props.empty) - val workflowPaths = new WorkflowPaths(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig) + val workflowPaths = new WorkflowPathsWithDocker(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig) val initializationData = new SharedFileSystemBackendInitializationData(workflowPaths, SharedFileSystemValidatedRuntimeAttributesBuilder.default.withValidation(DockerValidation.optional)) diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala index ef01c9985f9..6fd75323e4a 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala @@ -2,14 +2,14 @@ package cromwell.backend.impl.spark import akka.actor.{ActorRef, ActorSystem, Props} import cromwell.backend._ +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.SharedFileSystemExpressionFunctions -import cromwell.backend.io.JobPaths import cromwell.core.CallContext -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.WdlStandardLibraryFunctions case class SparkBackendFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor, actorSystem: ActorSystem) extends BackendLifecycleActorFactory { - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], serviceRegistryActor: ActorRef): Option[Props] = { + override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { Option(SparkInitializationActor.props(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } @@ -22,13 +22,13 @@ case class SparkBackendFactory(name: String, configurationDescriptor: BackendCon override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) + val jobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, configurationDescriptor.backendConfig) val callContext = new CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toAbsolutePath.toString, jobPaths.stderr.toAbsolutePath.toString ) - new SharedFileSystemExpressionFunctions(SparkJobExecutionActor.DefaultFileSystems, callContext) + new SharedFileSystemExpressionFunctions(SparkJobExecutionActor.DefaultPathBuilders, callContext) } } diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala index 2c4b5f94f76..1ff66aa2474 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala @@ -6,7 +6,7 @@ import cromwell.backend.validation.RuntimeAttributesDefault import cromwell.backend.validation.RuntimeAttributesKeys._ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} import cromwell.core.WorkflowOptions -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.types.{WdlBooleanType, WdlIntegerType, WdlStringType} import wdl4s.values.WdlValue @@ -18,14 +18,14 @@ object SparkInitializationActor { SparkRuntimeAttributes.NumberOfExecutorsKey, SparkRuntimeAttributes.AppMainClassKey) def props(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], configurationDescriptor: BackendConfigurationDescriptor, serviceRegistryActor: ActorRef): Props = Props(new SparkInitializationActor(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } class SparkInitializationActor(override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], + override val calls: Set[TaskCall], override val configurationDescriptor: BackendConfigurationDescriptor, override val serviceRegistryActor: ActorRef) extends BackendWorkflowInitializationActor { diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala index c793338eb0b..927ac7d45ec 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala @@ -1,15 +1,16 @@ package cromwell.backend.impl.spark -import java.nio.file.FileSystems import java.nio.file.attribute.PosixFilePermission import akka.actor.Props -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.impl.spark.SparkClusterProcess._ -import cromwell.backend.io.JobPaths +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.{SharedFileSystem, SharedFileSystemExpressionFunctions} +import cromwell.backend.wdl.Command import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobExecutionActor} -import cromwell.core.{TailedWriter, UntailedWriter} +import cromwell.core.path.JavaWriterImplicits._ +import cromwell.core.path.{DefaultPathBuilder, TailedWriter, UntailedWriter} import wdl4s.parser.MemoryUnit import wdl4s.util.TryUtil @@ -18,7 +19,7 @@ import scala.sys.process.ProcessLogger import scala.util.{Failure, Success, Try} object SparkJobExecutionActor { - val DefaultFileSystems = List(FileSystems.getDefault) + val DefaultPathBuilders = List(DefaultPathBuilder) def props(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor): Props = Props(new SparkJobExecutionActor(jobDescriptor, configurationDescriptor)) @@ -29,8 +30,8 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, import SparkJobExecutionActor._ import better.files._ - import cromwell.core.PathFactory._ + override val pathBuilders = DefaultPathBuilders private val tag = s"SparkJobExecutionActor-${jobDescriptor.key.tag}:" lazy val cmds = new SparkCommands @@ -43,7 +44,7 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private val sparkDeployMode = configurationDescriptor.backendConfig.getString("deployMode").toLowerCase override val sharedFileSystemConfig = fileSystemsConfig.getConfig("local") private val workflowDescriptor = jobDescriptor.workflowDescriptor - private val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobDescriptor.key) + private val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, configurationDescriptor.backendConfig) // Files private val executionDir = jobPaths.callExecutionRoot @@ -58,9 +59,9 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private lazy val isClusterMode = isSparkClusterMode(sparkDeployMode, sparkMaster) private val call = jobDescriptor.key.call - private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, DefaultFileSystems) + private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, DefaultPathBuilders) - private val lookup = jobDescriptor.inputs.apply _ + private val lookup = jobDescriptor.fullyQualifiedInputs.apply _ private val executionResponse = Promise[BackendJobExecutionResponse]() @@ -107,12 +108,12 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private def resolveExecutionResult(jobReturnCode: Try[Int], failedOnStderr: Boolean): Future[BackendJobExecutionResponse] = { (jobReturnCode, failedOnStderr) match { case (Success(0), true) if File(jobPaths.stderr).lines.toList.nonEmpty => - Future.successful(FailedNonRetryableResponse(jobDescriptor.key, + Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed although return code is zero but stderr is not empty"), Option(0))) case (Success(0), _) => resolveExecutionProcess - case (Success(rc), _) => Future.successful(FailedNonRetryableResponse(jobDescriptor.key, + case (Success(rc), _) => Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed. Spark returned non zero status code: $rc"), Option(rc))) - case (Failure(error), _) => Future.successful(FailedNonRetryableResponse(jobDescriptor.key, error, None)) + case (Failure(error), _) => Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, error, None)) } } @@ -122,9 +123,9 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, case true => clusterExtProcess.startMonitoringSparkClusterJob(jobPaths.callExecutionRoot, SubmitJobJson.format(sparkDeployMode)) collect { case Finished => processSuccess(0) - case Failed(error: Throwable) => FailedNonRetryableResponse(jobDescriptor.key, error, None) + case Failed(error: Throwable) => JobFailedNonRetryableResponse(jobDescriptor.key, error, None) } recover { - case error: Throwable => FailedNonRetryableResponse(jobDescriptor.key, error, None) + case error: Throwable => JobFailedNonRetryableResponse(jobDescriptor.key, error, None) } case false => Future.successful(processSuccess(0)) } @@ -132,12 +133,12 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private def processSuccess(rc: Int) = { evaluateOutputs(callEngineFunction, outputMapper(jobPaths)) match { - case Success(outputs) => SucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) + case Success(outputs) => JobSucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) case Failure(e) => val message = Option(e.getMessage) map { ": " + _ } getOrElse "" - FailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) + JobFailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) } } @@ -155,9 +156,12 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, executionDir.toString.toFile.createIfNotExists(asDirectory = true, createParents = true) log.debug("{} Resolving job command", tag) - val command = localizeInputs(jobPaths.callInputsRoot, docker = false, DefaultFileSystems, jobDescriptor.inputs) flatMap { - localizedInputs => call.task.instantiateCommand(localizedInputs, callEngineFunction, identity) - } + + val command = Command.instantiate( + jobDescriptor, + callEngineFunction, + localizeInputs(jobPaths.callInputsRoot, docker = false) + ) log.debug("{} Creating bash script for executing command: {}", tag, command) // TODO: we should use shapeless Heterogeneous list here not good to have generic map @@ -202,7 +206,7 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, case false => executionResponse completeWith executeTask(extProcess, stdoutWriter, stderrWriter) } } recover { - case exception => executionResponse success FailedNonRetryableResponse(jobDescriptor.key, exception, None) + case exception => executionResponse success JobFailedNonRetryableResponse(jobDescriptor.key, exception, None) } } diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala index 60f3992186f..01d22116108 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala @@ -2,12 +2,12 @@ package cromwell.backend.impl.spark import java.nio.file.Path +import better.files._ import com.typesafe.scalalogging.StrictLogging -import cromwell.core.{TailedWriter, UntailedWriter} -import cromwell.core.PathFactory.EnhancedPath +import cromwell.core.path.PathImplicits._ +import cromwell.core.path.{TailedWriter, UntailedWriter} import scala.sys.process._ -import better.files._ import scala.util.{Failure, Success, Try} object SparkCommands { @@ -29,12 +29,17 @@ class SparkCommands extends StrictLogging { * as some extra shell code for monitoring jobs */ def writeScript(instantiatedCommand: String, filePath: Path, containerRoot: Path) = { - File(filePath).write( - s"""#!/bin/sh - |cd $containerRoot - |$instantiatedCommand - |echo $$? > rc - |""".stripMargin) + + val scriptBody = + s""" + +#!/bin/sh +cd $containerRoot +$instantiatedCommand +echo $$? > rc + + """.trim + "\n" + File(filePath).write(scriptBody) } def sparkSubmitCommand(attributes: Map[String, Any]): String = { diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala index f8aba1a4bb4..de58b7c0633 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala @@ -7,6 +7,7 @@ import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescript import cromwell.core.TestKitSuite import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} import wdl4s._ + import scala.concurrent.duration._ class SparkInitializationActorSpec extends TestKitSuite("SparkInitializationActorSpec") @@ -27,12 +28,12 @@ class SparkInitializationActorSpec extends TestKitSuite("SparkInitializationAc | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin - private def getSparkBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], conf: BackendConfigurationDescriptor) = { + private def getSparkBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], conf: BackendConfigurationDescriptor) = { system.actorOf(SparkInitializationActor.props(workflowDescriptor, calls, conf, emptyActor)) } @@ -41,7 +42,7 @@ class SparkInitializationActorSpec extends TestKitSuite("SparkInitializationAc within(Timeout) { EventFilter.warning(message = s"Key/s [memory] is/are not supported by SparkBackend. Unsupported attributes will not be part of jobs executions.", occurrences = 1) intercept { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { memory: 1 %s: "%s"}""".format("appMainClass", "test")) - val backend = getSparkBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, emptyBackendConfig) + val backend = getSparkBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, emptyBackendConfig) backend ! Initialize } } diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala index aa4400ac8fa..3a7873fc5fa 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala @@ -6,12 +6,12 @@ import java.nio.file.Path import akka.testkit.{ImplicitSender, TestActorRef} import better.files._ import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.impl.spark.SparkClusterProcess._ import cromwell.backend.io._ import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendSpec} -import cromwell.core.{PathWriter, TailedWriter, TestKitSuite, UntailedWriter, _} -import org.mockito.Matchers._ +import cromwell.core.{TestKitSuite, WorkflowOptions} +import cromwell.core.path.{PathWriter, TailedWriter, UntailedWriter} import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.concurrent.PatienceConfiguration.Timeout @@ -50,7 +50,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -68,7 +68,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") | RUNTIME |} | - |workflow helloClusterMode { + |workflow wf_helloClusterMode { | call helloClusterMode |} """.stripMargin @@ -171,7 +171,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.startMonitoringSparkClusterJob(any[Path], any[String])).thenReturn(Future.successful(Finished)) whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -201,8 +201,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.startMonitoringSparkClusterJob(any[Path], any[String])).thenReturn(Future.successful(Failed(new Throwable("failed to monitor")))) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("failed to monitor")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("failed to monitor")) verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -232,8 +232,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.startMonitoringSparkClusterJob(any[Path], any[String])).thenReturn(Future.failed(new IllegalStateException("failed to start monitoring process"))) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("failed to start monitoring process")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("failed to start monitoring process")) verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -263,8 +263,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.processStderr).thenReturn(sampleSubmissionResponse) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -292,8 +292,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.processStderr).thenReturn(stderrResult) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) } cleanUpJob(jobPaths) } @@ -318,8 +318,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.processStderr).thenReturn(stderrResult) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"submit job process exitValue method failed")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"submit job process exitValue method failed")) } cleanUpJob(jobPaths) } @@ -347,7 +347,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") }).underlyingActor whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(sparkProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkProcess, times(1)).untailedWriter(any[Path]) @@ -376,8 +376,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkProcess.processStderr).thenReturn(stderrResult) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) } cleanUpJob(jobPaths) @@ -402,8 +402,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) } cleanUpJob(jobPaths) @@ -427,7 +427,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(sparkProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkProcess, times(1)).untailedWriter(any[Path]) @@ -438,7 +438,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") } - private def cleanUpJob(jobPaths: JobPaths): Unit = { + private def cleanUpJob(jobPaths: JobPathsWithDocker): Unit = { File(jobPaths.workflowRoot).delete(true) () } @@ -447,7 +447,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") val backendWorkflowDescriptor = buildWorkflowDescriptor(wdl = wdlSource, inputs = inputFiles.getOrElse(Map.empty), runtime = runtimeString) val backendConfigurationDescriptor = if (isCluster) BackendConfigurationDescriptor(backendClusterConfig, ConfigFactory.load) else BackendConfigurationDescriptor(backendClientConfig, ConfigFactory.load) val jobDesc = jobDescriptorFromSingleCallWorkflow(backendWorkflowDescriptor, inputFiles.getOrElse(Map.empty), WorkflowOptions.empty, Set.empty) - val jobPaths = if (isCluster) new JobPaths(backendWorkflowDescriptor, backendClusterConfig, jobDesc.key) else new JobPaths(backendWorkflowDescriptor, backendClientConfig, jobDesc.key) + val jobPaths = if (isCluster) new JobPathsWithDocker(jobDesc.key, backendWorkflowDescriptor, backendClusterConfig) else new JobPathsWithDocker(jobDesc.key, backendWorkflowDescriptor, backendClientConfig) val executionDir = jobPaths.callExecutionRoot val stdout = File(executionDir.toString, "stdout") stdout.createIfNotExists(asDirectory = false, createParents = true) @@ -456,7 +456,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") TestJobDescriptor(jobDesc, jobPaths, backendConfigurationDescriptor) } - private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPaths, backendConfigurationDescriptor: BackendConfigurationDescriptor) + private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPathsWithDocker, backendConfigurationDescriptor: BackendConfigurationDescriptor) trait MockWriter extends Writer { var closed = false diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala index 33724cb6dac..6167f0fd75a 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala @@ -9,7 +9,7 @@ import wdl4s.WdlExpression._ import wdl4s.expression.NoFunctions import wdl4s.util.TryUtil import wdl4s.values.WdlValue -import wdl4s.{Call, WdlExpression, _} +import wdl4s.{Call, _} class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { @@ -26,7 +26,7 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -90,7 +90,7 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { runtime: String) = { BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime.format("appMainClass", "com.test.spark"))), + WdlNamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime.format("appMainClass", "com.test.spark")), Seq.empty[ImportResolver]).workflow, inputs, options ) @@ -100,12 +100,11 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { val workflowDescriptor = buildWorkflowDescriptor(wdlSource, runtime = runtimeAttributes) def createLookup(call: Call): ScopedLookupFunction = { - val declarations = workflowDescriptor.workflowNamespace.workflow.declarations ++ call.task.declarations val knownInputs = workflowDescriptor.inputs - WdlExpression.standardLookupFunction(knownInputs, declarations, NoFunctions) + call.lookupFunction(knownInputs, NoFunctions) } - workflowDescriptor.workflowNamespace.workflow.calls map { + workflowDescriptor.workflow.taskCalls map { call => val ra = call.task.runtimeAttributes.attrs mapValues { _.evaluate(createLookup(call), NoFunctions) } TryUtil.sequenceMap(ra, "Runtime attributes evaluation").get From 22fe86033fb7d7ef7589ad3f3e2c60dbed1b22e0 Mon Sep 17 00:00:00 2001 From: Ruchi Munshi Date: Tue, 11 Apr 2017 16:19:10 -0400 Subject: [PATCH 010/326] Pin release to centaur branch --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1be54097a31..44ffb177240 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ before_install: - openssl aes-256-cbc -K "$encrypted_5ebd3ff04788_key" -iv "$encrypted_5ebd3ff04788_iv" -in src/bin/travis/resources/jesConf.tar.enc -out jesConf.tar -d || true env: global: - - CENTAUR_BRANCH=develop + - CENTAUR_BRANCH=40dff9cade6170d691dae383003d60fecfe24bd2 matrix: # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked - BUILD_TYPE=sbt From a6bcebb091de9da092603eebfe8a0e288b4c8175 Mon Sep 17 00:00:00 2001 From: Thibault Jeandet Date: Tue, 30 May 2017 14:03:37 -0400 Subject: [PATCH 011/326] Pin release to centaur branch --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 44ffb177240..d617bdb877a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ before_install: - openssl aes-256-cbc -K "$encrypted_5ebd3ff04788_key" -iv "$encrypted_5ebd3ff04788_iv" -in src/bin/travis/resources/jesConf.tar.enc -out jesConf.tar -d || true env: global: - - CENTAUR_BRANCH=40dff9cade6170d691dae383003d60fecfe24bd2 + - CENTAUR_BRANCH=989e3d73ca72567398fd2c9fb03dad57d9cb69f5 matrix: # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked - BUILD_TYPE=sbt From 4480026ca849cc06962381b24062d1b701747ee5 Mon Sep 17 00:00:00 2001 From: Thibault Jeandet Date: Fri, 30 Jun 2017 14:21:29 -0400 Subject: [PATCH 012/326] Point centaur to develop --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d617bdb877a..1be54097a31 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ before_install: - openssl aes-256-cbc -K "$encrypted_5ebd3ff04788_key" -iv "$encrypted_5ebd3ff04788_iv" -in src/bin/travis/resources/jesConf.tar.enc -out jesConf.tar -d || true env: global: - - CENTAUR_BRANCH=989e3d73ca72567398fd2c9fb03dad57d9cb69f5 + - CENTAUR_BRANCH=develop matrix: # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked - BUILD_TYPE=sbt From 4521fd490dedf5ed287df7ed7e0147f8d7da6680 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Tue, 2 Oct 2018 12:10:33 -0400 Subject: [PATCH 013/326] Fix release mishap - version number --- project/Version.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Version.scala b/project/Version.scala index c7885ad009d..e56538c415a 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -5,7 +5,7 @@ import sbt._ object Version { // Upcoming release, or current if we're on a master / hotfix branch - val cromwellVersion = "36" + val cromwellVersion = "35" // Adapted from SbtGit.versionWithGit def cromwellVersionWithGit: Seq[Setting[_]] = From ee8cfd4aa6e2e3c1ce0a77416f21886be8e78826 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Fri, 28 Feb 2020 13:04:10 -0500 Subject: [PATCH 014/326] Fix missing pig in docs (#5435) [no JIRA] --- docs/tutorials/MetadataEndpoint.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/MetadataEndpoint.md b/docs/tutorials/MetadataEndpoint.md index 2883c1e0528..fd463f270f4 100644 --- a/docs/tutorials/MetadataEndpoint.md +++ b/docs/tutorials/MetadataEndpoint.md @@ -25,4 +25,4 @@ After completing this tutorial you might find the following page interesting: _Drop us a line in the [Forum](https://gatkforums.broadinstitute.org/wdl/categories/ask-the-wdl-team) if you have a question._ \*\*\* **UNDER CONSTRUCTION** \*\*\* -[![Pennywell pig in red wellies - Richard Austin Images](http://www.richardaustinimages.com/wp-content/uploads/2015/04/fluffyAustin_Pigets_Wellies-500x395.jpg)](http://www.richardaustinimages.com/product/pennywell-pigs-under-umbrella-2/) +[![Pennywell pig in red wellies - Richard Austin Images](https://static.wixstatic.com/media/b0d56a_6b627e45766d44fa8b2714f5d7860c84~mv2.jpg/v1/fill/w_787,h_551,al_c,q_50,usm_0.66_1.00_0.01/b0d56a_6b627e45766d44fa8b2714f5d7860c84~mv2.jpg) From fa44f36a53234929deed13ad51edff8a568edb23 Mon Sep 17 00:00:00 2001 From: Marissa <30879508+mepowers@users.noreply.github.com> Date: Wed, 3 Jun 2020 15:55:29 -0700 Subject: [PATCH 015/326] Update Google.md (#5529) Thanks! --- docs/backends/Google.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/backends/Google.md b/docs/backends/Google.md index 7c0c492b829..82320340a55 100644 --- a/docs/backends/Google.md +++ b/docs/backends/Google.md @@ -11,7 +11,7 @@ The instructions below assume you have created a Google Cloud Storage bucket and **Configuring Authentication** -The `google` stanza in the Cromwell configuration file defines how to authenticate to Google. There are four different +The `google` stanza in the Cromwell configuration file defines how to authenticate to Google. There are five different authentication schemes that might be used: * `application_default` (default, recommended) - Use [application default](https://developers.google.com/identity/protocols/application-default-credentials) credentials. @@ -55,7 +55,7 @@ the `genomics` and `filesystems.gcs` sections within a Google configuration bloc The auth for the `genomics` section governs the interactions with Google itself, while `filesystems.gcs` governs the localization of data into and out of GCE VMs. -**Application Default Credentials** +***Application Default Credentials*** By default, application default credentials will be used. Only `name` and `scheme` are required for application default credentials. @@ -66,7 +66,7 @@ $ gcloud auth login $ gcloud config set project my-project ``` -**Service Account** +***Service Account*** First create a new service account through the [API Credentials](https://console.developers.google.com/apis/credentials) page. Go to **Create credentials -> Service account key**. Then in the **Service account** dropdown select **New service account**. Fill in a name (e.g. `my-account`), and select key type of JSON. From 0963109d8fffae1b567d2b4f74a4fb896d69d0dc Mon Sep 17 00:00:00 2001 From: Kyle Vernest <34135227+kv076@users.noreply.github.com> Date: Tue, 15 Dec 2020 13:51:26 -0500 Subject: [PATCH 016/326] Update LanguageSupport.md reference our Jira documentation rather than github with the provided link --- docs/LanguageSupport.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/LanguageSupport.md b/docs/LanguageSupport.md index 74f5ab238f6..d14d6751a42 100644 --- a/docs/LanguageSupport.md +++ b/docs/LanguageSupport.md @@ -26,7 +26,7 @@ As well as the changes to the WDL spec between draft-2 and 1.0, Cromwell also su ### CWL 1.0 Cromwell provides support for Common Workflow Language (CWL), beginning with the core spec, and most heavily used requirements. -If you spot a CWL feature that Cromwell doesn't support, please notify us using an issue on our github page! +If you spot a CWL feature that Cromwell doesn't support, please notify us using an issue on our [Jira page](https://broadworkbench.atlassian.net/secure/RapidBoard.jspa?rapidView=39&view=planning.nodetail&issueLimit=100)! ## Future Language Support From ef30230346c318bc60e95ef0c023e97a92ed8fb8 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Tue, 13 Jul 2021 16:18:22 -0400 Subject: [PATCH 017/326] check buckets for region and ensure client has correct region --- .../java/org/lerch/s3fs/AmazonS3Factory.java | 14 +++---- .../org/lerch/s3fs/S3AccessControlList.java | 8 ++-- .../java/org/lerch/s3fs/S3FileChannel.java | 20 ++++----- .../main/java/org/lerch/s3fs/S3FileStore.java | 42 ++++++++++++++----- .../java/org/lerch/s3fs/S3FileSystem.java | 30 ++++++------- .../org/lerch/s3fs/S3FileSystemProvider.java | 29 +++++++------ .../main/java/org/lerch/s3fs/S3Iterator.java | 22 ++++------ .../src/main/java/org/lerch/s3fs/S3Path.java | 4 +- .../org/lerch/s3fs/S3SeekableByteChannel.java | 28 ++++++------- .../s3fs/attribute/S3BasicFileAttributes.java | 4 +- .../s3fs/attribute/S3PosixFileAttributes.java | 2 - .../java/org/lerch/s3fs/util/S3Utils.java | 25 ++++------- 12 files changed, 114 insertions(+), 114 deletions(-) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java b/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java index 987e8d71c6a..9d525587642 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java @@ -1,20 +1,16 @@ package org.lerch.s3fs; - + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; -import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; -import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; -import software.amazon.awssdk.auth.credentials.AwsCredentials; -import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.*; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.http.SdkHttpClient; +import software.amazon.awssdk.http.apache.ApacheHttpClient; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3ClientBuilder; import software.amazon.awssdk.services.s3.S3Configuration; -import software.amazon.awssdk.http.SdkHttpClient; -import software.amazon.awssdk.http.apache.ApacheHttpClient; -import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; import java.net.URI; import java.util.Properties; diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3AccessControlList.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3AccessControlList.java index 0d908001ed3..1f219164707 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3AccessControlList.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3AccessControlList.java @@ -1,14 +1,14 @@ package org.lerch.s3fs; -import static java.lang.String.format; +import software.amazon.awssdk.services.s3.model.Grant; +import software.amazon.awssdk.services.s3.model.Owner; +import software.amazon.awssdk.services.s3.model.Permission; import java.nio.file.AccessDeniedException; import java.nio.file.AccessMode; import java.util.EnumSet; -import software.amazon.awssdk.services.s3.model.Grant; -import software.amazon.awssdk.services.s3.model.Owner; -import software.amazon.awssdk.services.s3.model.Permission; +import static java.lang.String.format; public class S3AccessControlList { private String fileStoreName; diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileChannel.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileChannel.java index 2f8bd41ff8c..e849294791d 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileChannel.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileChannel.java @@ -1,8 +1,15 @@ package org.lerch.s3fs; import org.apache.tika.Tika; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import java.io.*; +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; @@ -14,13 +21,6 @@ import java.util.HashSet; import java.util.Set; -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.core.ResponseInputStream; -import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.GetObjectResponse; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.S3Object; - import static java.lang.String.format; public class S3FileChannel extends FileChannel implements S3Channel { @@ -46,7 +46,7 @@ else if (!exists && !this.options.contains(StandardOpenOption.CREATE_NEW) && boolean removeTempFile = true; try { if (exists) { - try (ResponseInputStream byteStream = path.getFileSystem() + try (ResponseInputStream byteStream = path.getFileStore() .getClient() .getObject(GetObjectRequest .builder() @@ -171,7 +171,7 @@ protected void sync() throws IOException { .contentLength(length) .contentType(new Tika().detect(stream, path.getFileName().toString())); - path.getFileSystem().getClient().putObject(builder.build(), RequestBody.fromInputStream(stream, length)); + path.getFileStore().getClient().putObject(builder.build(), RequestBody.fromInputStream(stream, length)); } } } diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java index d8082d0605b..6f3c879bc4d 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java @@ -1,28 +1,35 @@ package org.lerch.s3fs; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.*; + import java.io.IOException; +import java.net.URI; import java.nio.file.FileStore; import java.nio.file.attribute.FileAttributeView; import java.nio.file.attribute.FileStoreAttributeView; import java.util.Date; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.Bucket; -import software.amazon.awssdk.services.s3.model.GetBucketAclRequest; -import software.amazon.awssdk.services.s3.model.HeadBucketRequest; -import software.amazon.awssdk.services.s3.model.ListBucketsRequest; -import software.amazon.awssdk.services.s3.model.NoSuchBucketException; -import software.amazon.awssdk.services.s3.model.Owner; -import com.google.common.collect.ImmutableList; - +/** + * In S3 a filestore translates to a bucket + */ public class S3FileStore extends FileStore implements Comparable { private S3FileSystem fileSystem; private String name; + private S3Client defaultClient; + private S3Client bucketSpecificClient; + private Logger logger = LoggerFactory.getLogger("S3FileStore"); public S3FileStore(S3FileSystem s3FileSystem, String name) { this.fileSystem = s3FileSystem; this.name = name; + // the default client can be used for getBucketLocation operations + this.defaultClient = S3Client.builder().endpointOverride(URI.create("https://s3.us-east-1.amazonaws.com")).region(Region.US_EAST_1).build(); } @Override @@ -121,8 +128,21 @@ public S3Path getRootDirectory() { return new S3Path(fileSystem, "/" + this.name()); } - private S3Client getClient() { - return fileSystem.getClient(); + /** + * Gets a client suitable for this FileStore (bucket) including configuring the correct region endpoint. If no client + * exists one will be constructed and cached. + * @return a client + */ + public S3Client getClient() { + if (bucketSpecificClient == null) { + String bucketLocation = defaultClient.getBucketLocation(builder -> builder.bucket(this.name)).locationConstraintAsString(); + logger.debug("Bucket location is '{}'", bucketLocation); + + Region region = bucketLocation.trim().equals("") ? Region.US_EAST_1 : Region.of(bucketLocation); + bucketSpecificClient = S3Client.builder().region(region).build(); + } + + return bucketSpecificClient; } public Owner getOwner() { diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystem.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystem.java index 4fadae9518b..0038a787c64 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystem.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystem.java @@ -1,20 +1,16 @@ package org.lerch.s3fs; -import static org.lerch.s3fs.S3Path.PATH_SEPARATOR; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.Bucket; import java.io.IOException; -import java.nio.file.FileStore; -import java.nio.file.FileSystem; -import java.nio.file.Path; -import java.nio.file.PathMatcher; -import java.nio.file.WatchService; +import java.nio.file.*; import java.nio.file.attribute.UserPrincipalLookupService; import java.util.Set; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.Bucket; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; +import static org.lerch.s3fs.S3Path.PATH_SEPARATOR; /** * S3FileSystem with a concrete client configured and ready to use. @@ -63,7 +59,7 @@ public boolean isReadOnly() { @Override public String getSeparator() { - return S3Path.PATH_SEPARATOR; + return PATH_SEPARATOR; } @Override @@ -113,9 +109,15 @@ public WatchService newWatchService() throws IOException { throw new UnsupportedOperationException(); } - public S3Client getClient() { - return client; - } +// /** +// * Deprecated: since SDKv2 many S3 operations need to be signed with a client using the same Region as the location +// * of the bucket. Prefer S3Path.client() instead. +// * @return +// */ +// @Deprecated +// public S3Client getClient() { +// return client; +// } /** * get the endpoint associated with this fileSystem. diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystemProvider.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystemProvider.java index 0095e372a43..e3fdf0065a5 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystemProvider.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystemProvider.java @@ -1,11 +1,11 @@ package org.lerch.s3fs; -import org.apache.commons.lang3.tuple.ImmutablePair; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.lerch.s3fs.attribute.S3BasicFileAttributeView; import org.lerch.s3fs.attribute.S3BasicFileAttributes; import org.lerch.s3fs.attribute.S3PosixFileAttributeView; @@ -36,7 +36,8 @@ import static com.google.common.collect.Sets.difference; import static java.lang.String.format; -import static java.lang.Thread.*; +import static java.lang.Thread.currentThread; +import static java.lang.Thread.sleep; import static org.lerch.s3fs.AmazonS3Factory.*; /** @@ -286,7 +287,10 @@ public S3FileSystem getFileSystem(URI uri) { if (fileSystems.containsKey(key)) { return fileSystems.get(key); } else { - throw new FileSystemNotFoundException("S3 filesystem not yet created. Use newFileSystem() instead"); + final String scheme = uri.getScheme(); + final String uriString = uri.toString(); + uriString.replace(scheme, "https://"); + return (S3FileSystem) newFileSystem(uri, Collections.emptyMap()); } } @@ -337,7 +341,7 @@ public InputStream newInputStream(Path path, OpenOption... options) throws IOExc try { ResponseInputStream res = s3Path - .getFileSystem() + .getFileStore() .getClient() .getObject(GetObjectRequest .builder() @@ -384,7 +388,7 @@ public void createDirectory(Path dir, FileAttribute... attrs) throws IOExcept Bucket bucket = s3Path.getFileStore().getBucket(); String bucketName = s3Path.getFileStore().name(); if (bucket == null) { - s3Path.getFileSystem().getClient().createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); + s3Path.getFileStore().getClient().createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); } // create the object as directory PutObjectRequest.Builder builder = PutObjectRequest.builder(); @@ -392,7 +396,7 @@ public void createDirectory(Path dir, FileAttribute... attrs) throws IOExcept builder.bucket(bucketName) .key(directoryKey) .contentLength(0L); - s3Path.getFileSystem().getClient().putObject(builder.build(), RequestBody.fromBytes(new byte[0])); + s3Path.getFileStore().getClient().putObject(builder.build(), RequestBody.fromBytes(new byte[0])); } @Override @@ -405,9 +409,9 @@ public void delete(Path path) throws IOException { String key = s3Path.getKey(); String bucketName = s3Path.getFileStore().name(); - s3Path.getFileSystem().getClient().deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(key).build()); + s3Path.getFileStore().getClient().deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(key).build()); // we delete the two objects (sometimes exists the key '/' and sometimes not) - s3Path.getFileSystem().getClient().deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(key + "/").build()); + s3Path.getFileStore().getClient().deleteObject(DeleteObjectRequest.builder().bucket(bucketName).key(key + "/").build()); } @Override @@ -437,7 +441,8 @@ public void copy(Path source, Path target, CopyOption... options) throws IOExcep String keySource = s3Source.getKey(); String bucketNameTarget = s3Target.getFileStore().name(); String keyTarget = s3Target.getKey(); - s3Source.getFileSystem() + // for a cross region copy the client must be for the target (region) not the source region + s3Target.getFileStore() .getClient() .copyObject(CopyObjectRequest.builder() .copySource(bucketNameOrigin + "/" + keySource) @@ -458,7 +463,7 @@ public void copy(Path source, Path target, CopyOption... options) throws IOExcep private void multiPartCopy(S3Path source, long objectSize, S3Path target, CopyOption... options) { log.info(() -> "Attempting multipart copy as part of call cache hit: source = " + source + ", objectSize = " + objectSize + ", target = " + target + ", options = " + Arrays.deepToString(options)); - S3Client s3Client = target.getFileSystem().getClient(); + S3Client s3Client = target.getFileStore().getClient(); final CreateMultipartUploadRequest createMultipartUploadRequest = CreateMultipartUploadRequest.builder() .bucket(target.getFileStore().name()) @@ -594,7 +599,7 @@ private void multiPartCopy(S3Path source, long objectSize, S3Path target, CopyOp */ private long objectSize(S3Path object) { - S3Client s3Client = object.getFileSystem().getClient(); + S3Client s3Client = object.getFileStore().getClient(); final String bucket = object.getFileStore().name(); final String key = object.getKey(); final HeadObjectResponse headObjectResponse = s3Client.headObject(HeadObjectRequest.builder() @@ -656,7 +661,7 @@ public void checkAccess(Path path, AccessMode... modes) throws IOException { String key = s3Utils.getS3ObjectSummary(s3Path).key(); String bucket = s3Path.getFileStore().name(); S3AccessControlList accessControlList = - new S3AccessControlList(bucket, key, s3Path.getFileSystem().getClient().getObjectAcl(GetObjectAclRequest.builder().bucket(bucket).key(key).build()).grants(), s3Path.getFileStore().getOwner()); + new S3AccessControlList(bucket, key, s3Path.getFileStore().getClient().getObjectAcl(GetObjectAclRequest.builder().bucket(bucket).key(key).build()).grants(), s3Path.getFileStore().getOwner()); accessControlList.checkAccess(modes); } diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3Iterator.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3Iterator.java index 3d8f5cb8072..803b5275892 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3Iterator.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3Iterator.java @@ -1,21 +1,15 @@ package org.lerch.s3fs; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.Set; - +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import org.lerch.s3fs.util.S3Utils; import software.amazon.awssdk.services.s3.model.CommonPrefix; import software.amazon.awssdk.services.s3.model.ListObjectsRequest; import software.amazon.awssdk.services.s3.model.ListObjectsResponse; import software.amazon.awssdk.services.s3.model.S3Object; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import org.lerch.s3fs.util.S3Utils; + +import java.nio.file.Path; +import java.util.*; /** * S3 iterator over folders at first level. @@ -50,7 +44,7 @@ public S3Iterator(S3FileStore fileStore, String key, boolean incremental) { this.fileStore = fileStore; this.fileSystem = fileStore.getFileSystem(); this.key = key; - this.current = fileSystem.getClient().listObjects(listObjectsRequest); + this.current = fileStore.getClient().listObjects(listObjectsRequest); this.incremental = incremental; loadObjects(); } @@ -69,7 +63,7 @@ public S3Path next() { .marker(current.nextMarker()) .build(); - this.current = fileSystem.getClient().listObjects(request); + this.current = fileStore.getClient().listObjects(request); loadObjects(); } if (cursor == size) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3Path.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3Path.java index 271a1b31e72..1382c16c3ad 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3Path.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3Path.java @@ -4,19 +4,19 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import org.lerch.s3fs.attribute.S3BasicFileAttributes; +import software.amazon.awssdk.services.s3.S3Client; import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URI; -import java.net.URL; import java.net.URLDecoder; import java.nio.file.*; import java.util.Iterator; import java.util.List; -import java.util.Map; import static com.google.common.collect.Iterables.*; +import static com.google.common.collect.Iterables.concat; import static java.lang.String.format; public class S3Path implements Path { diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3SeekableByteChannel.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3SeekableByteChannel.java index 31318d85c25..5f0ff571521 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3SeekableByteChannel.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3SeekableByteChannel.java @@ -1,8 +1,11 @@ package org.lerch.s3fs; -import static java.lang.String.format; +import org.apache.tika.Tika; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import java.io.ByteArrayInputStream; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; @@ -13,21 +16,14 @@ import java.util.HashSet; import java.util.Set; -import org.apache.tika.Tika; - -import software.amazon.awssdk.core.sync.RequestBody; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.GetObjectResponse; -import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.S3Object; +import static java.lang.String.format; public class S3SeekableByteChannel implements SeekableByteChannel, S3Channel { - private S3Path path; - private Set options; - private SeekableByteChannel seekable; - private Path tempFile; + private final S3Path path; + private final Set options; + private final SeekableByteChannel seekable; + private final Path tempFile; /** * Open or creates a file, returning a seekable byte channel @@ -52,7 +48,7 @@ else if (!exists && !this.options.contains(StandardOpenOption.CREATE_NEW) && boolean removeTempFile = true; try { if (exists) { - try (InputStream byteStream = path.getFileSystem().getClient() + try (InputStream byteStream = path.getFileStore().getClient() .getObject(GetObjectRequest.builder().bucket(path.getFileStore().getBucket().name()).key(key).build())) { Files.copy(byteStream, tempFile, StandardCopyOption.REPLACE_EXISTING); } @@ -115,7 +111,7 @@ protected void sync() throws IOException { builder.bucket(path.getFileStore().name()); builder.key(path.getKey()); - S3Client client = path.getFileSystem().getClient(); + S3Client client = path.getFileStore().getClient(); client.putObject(builder.build(), RequestBody.fromInputStream(stream, length)); } diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/attribute/S3BasicFileAttributes.java b/filesystems/s3/src/main/java/org/lerch/s3fs/attribute/S3BasicFileAttributes.java index 40e02b17abb..895f5109fb2 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/attribute/S3BasicFileAttributes.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/attribute/S3BasicFileAttributes.java @@ -1,10 +1,10 @@ package org.lerch.s3fs.attribute; -import static java.lang.String.format; - import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.FileTime; +import static java.lang.String.format; + public class S3BasicFileAttributes implements BasicFileAttributes { private final FileTime lastModifiedTime; private final long size; diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/attribute/S3PosixFileAttributes.java b/filesystems/s3/src/main/java/org/lerch/s3fs/attribute/S3PosixFileAttributes.java index fbe5efac57c..6ffdee62f20 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/attribute/S3PosixFileAttributes.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/attribute/S3PosixFileAttributes.java @@ -3,8 +3,6 @@ import java.nio.file.attribute.*; import java.util.Set; -import static java.lang.String.format; - public class S3PosixFileAttributes extends S3BasicFileAttributes implements PosixFileAttributes { private UserPrincipal userPrincipal; diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3Utils.java b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3Utils.java index e8586ef4d97..73e25a53b04 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3Utils.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3Utils.java @@ -1,22 +1,12 @@ package org.lerch.s3fs.util; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.HeadObjectRequest; -import software.amazon.awssdk.services.s3.model.HeadObjectResponse; -import software.amazon.awssdk.services.s3.model.GetObjectAclRequest; -import software.amazon.awssdk.services.s3.model.GetObjectAclResponse; -import software.amazon.awssdk.services.s3.model.Grant; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; -import software.amazon.awssdk.services.s3.model.Owner; -import software.amazon.awssdk.services.s3.model.Permission; -import software.amazon.awssdk.services.s3.model.S3Object; -import software.amazon.awssdk.services.s3.model.S3Exception; import com.google.common.collect.Sets; -import org.lerch.s3fs.attribute.S3BasicFileAttributes; import org.lerch.s3fs.S3Path; +import org.lerch.s3fs.attribute.S3BasicFileAttributes; import org.lerch.s3fs.attribute.S3PosixFileAttributes; import org.lerch.s3fs.attribute.S3UserPrincipal; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.*; import java.nio.file.NoSuchFileException; import java.nio.file.attribute.FileTime; @@ -24,7 +14,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.concurrent.TimeUnit; /** * Utilities to work with Amazon S3 Objects. @@ -41,7 +30,7 @@ public class S3Utils { public S3Object getS3ObjectSummary(S3Path s3Path) throws NoSuchFileException { String key = s3Path.getKey(); String bucketName = s3Path.getFileStore().name(); - S3Client client = s3Path.getFileSystem().getClient(); + S3Client client = s3Path.getFileStore().getClient(); // try to find the element with the current key (maybe with end slash or maybe not.) try { HeadObjectResponse metadata = client.headObject(HeadObjectRequest.builder().bucket(bucketName).key(key).build()); @@ -63,9 +52,9 @@ public S3Object getS3ObjectSummary(S3Path s3Path) throws NoSuchFileException { } // if not found (404 err) with the original key. - // try to find the elment as a directory. + // try to find the element as a directory. try { - // is a virtual directory + // is a virtual directory (S3 prefix) ListObjectsV2Request.Builder request = ListObjectsV2Request.builder(); request.bucket(bucketName); String keyFolder = key; @@ -111,7 +100,7 @@ public S3PosixFileAttributes getS3PosixFileAttributes(S3Path s3Path) throws NoSu Set permissions = null; if (!attrs.isDirectory()) { - S3Client client = s3Path.getFileSystem().getClient(); + S3Client client = s3Path.getFileStore().getClient(); GetObjectAclResponse acl = client.getObjectAcl(GetObjectAclRequest.builder().bucket(bucketName).key(key).build()); Owner owner = acl.owner(); From 7675fd94fd79ab93222376af21b26a9bbf579aac Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Thu, 22 Jul 2021 14:34:27 -0400 Subject: [PATCH 018/326] handle redirects and objects that don't allow read access to their ACL --- .../main/java/org/lerch/s3fs/S3FileStore.java | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java index 6f3c879bc4d..5a38a882ee3 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java @@ -2,6 +2,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkClientException; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3Configuration; @@ -118,8 +120,8 @@ private boolean hasBucket(String bucketName) { // model as HeadBucket is now required boolean bucket = false; try { - getClient().headBucket(HeadBucketRequest.builder().bucket(bucketName).build()); - bucket = true; + getClient().headBucket(HeadBucketRequest.builder().bucket(bucketName).build()); + bucket = true; }catch(NoSuchBucketException nsbe) {} return bucket; } @@ -135,16 +137,39 @@ public S3Path getRootDirectory() { */ public S3Client getClient() { if (bucketSpecificClient == null) { - String bucketLocation = defaultClient.getBucketLocation(builder -> builder.bucket(this.name)).locationConstraintAsString(); - logger.debug("Bucket location is '{}'", bucketLocation); - - Region region = bucketLocation.trim().equals("") ? Region.US_EAST_1 : Region.of(bucketLocation); - bucketSpecificClient = S3Client.builder().region(region).build(); + try { + logger.debug("Determining bucket location with getBucketLocation"); + String bucketLocation = defaultClient.getBucketLocation(builder -> builder.bucket(this.name)).locationConstraintAsString(); + + bucketSpecificClient = this.clientForRegion(bucketLocation); + + } catch (S3Exception e) { + if(e.statusCode() == 403) { + logger.info("Cannot determine bucket location directly. Attempting to obtain bucket location with headBucket operation"); + try { + final HeadBucketResponse headBucketResponse = defaultClient.headBucket(builder -> builder.bucket(this.name)); + bucketSpecificClient = this.clientForRegion(headBucketResponse.sdkHttpResponse().firstMatchingHeader("x-amz-bucket-region").orElseThrow()); + } catch (S3Exception e2) { + if (e2.statusCode() == 301) { + bucketSpecificClient = this.clientForRegion(e2.awsErrorDetails().sdkHttpResponse().firstMatchingHeader("x-amz-bucket-region").orElseThrow()); + } + } + } else { + logger.warn("Cannot determine location of {}, falling back to default s3 client for the current profile", this.name); + bucketSpecificClient = S3Client.create(); + } + } } return bucketSpecificClient; } + private S3Client clientForRegion(String regionString){ + Region region = regionString.equals("") ? Region.US_EAST_1 : Region.of(regionString); + logger.debug("Bucket region is: '{}'", region.id()); + return S3Client.builder().region(region).build(); + } + public Owner getOwner() { if (hasBucket(name)) return getClient().getBucketAcl(GetBucketAclRequest.builder().bucket(name).build()).owner(); From 08d95b8a0c6ecbb53337433b6c1c095d74f022ae Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Thu, 22 Jul 2021 14:34:59 -0400 Subject: [PATCH 019/326] added retry and size verification to s3 object localization --- .../backend/impl/aws/AwsBatchJob.scala | 71 ++++++++++++------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 2378c48c8ef..3903fe8a786 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -30,8 +30,6 @@ */ package cromwell.backend.impl.aws -import java.security.MessageDigest - import cats.data.ReaderT._ import cats.data.{Kleisli, ReaderT} import cats.effect.{Async, Timer} @@ -55,10 +53,11 @@ import software.amazon.awssdk.services.s3.S3Client import software.amazon.awssdk.services.s3.model.{GetObjectRequest, HeadObjectRequest, NoSuchKeyException, PutObjectRequest} import wdl4s.parser.MemoryUnit +import java.security.MessageDigest import scala.collection.JavaConverters._ import scala.concurrent.duration._ import scala.language.higherKinds -import scala.util.{Random, Try} +import scala.util.Try /** * The actual job for submission in AWS batch. `AwsBatchJob` is the primary interface to AWS Batch. It creates the @@ -85,18 +84,12 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL optAwsAuthMode: Option[AwsAuthMode] = None ) { - // values for container environment - val AWS_MAX_ATTEMPTS: String = "AWS_MAX_ATTEMPTS" - val AWS_MAX_ATTEMPTS_DEFAULT_VALUE: String = "14" - val AWS_RETRY_MODE: String = "AWS_RETRY_MODE" - val AWS_RETRY_MODE_DEFAULT_VALUE: String = "adaptive" val Log: Logger = LoggerFactory.getLogger(AwsBatchJob.getClass) //this will be the "folder" that scripts will live in (underneath the script bucket) val scriptKeyPrefix = "scripts/" - // TODO: Auth, endpoint lazy val batchClient: BatchClient = { val builder = BatchClient.builder() configureClient(builder, optAwsAuthMode, configRegion) @@ -119,26 +112,25 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL */ lazy val reconfiguredScript: String = { //this is the location of the aws cli mounted into the container by the ec2 launch template - val s3Cmd = "/usr/local/aws-cli/v2/current/bin/aws s3" + val awsCmd = "/usr/local/aws-cli/v2/current/bin/aws " //internal to the container, therefore not mounted val workDir = "/tmp/scratch" //working in a mount will cause collisions in long running workers val replaced = commandScript.replaceAllLiterally(AwsBatchWorkingDisk.MountPoint.pathAsString, workDir) val insertionPoint = replaced.indexOf("\n", replaced.indexOf("#!")) +1 //just after the new line after the shebang! - /* generate a series of s3 copy statements to copy any s3 files into the container. We randomize the order - so that large scatters don't all attempt to copy the same thing at the same time. */ - val inputCopyCommand = Random.shuffle(inputs.map { + /* generate a series of s3 copy statements to copy any s3 files into the container. */ + val inputCopyCommand = inputs.map { case input: AwsBatchFileInput if input.s3key.startsWith("s3://") && input.s3key.endsWith(".tmp") => //we are localizing a tmp file which may contain workdirectory paths that need to be reconfigured s""" - |$s3Cmd cp --no-progress ${input.s3key} $workDir/${input.local} + |_s3_localize_with_retry ${input.s3key} $workDir/${input.local} |sed -i 's#${AwsBatchWorkingDisk.MountPoint.pathAsString}#$workDir#g' $workDir/${input.local} |""".stripMargin case input: AwsBatchFileInput if input.s3key.startsWith("s3://") => - s"$s3Cmd cp --no-progress ${input.s3key} ${input.mount.mountPoint.pathAsString}/${input.local}" + s"_s3_localize_with_retry ${input.s3key} ${input.mount.mountPoint.pathAsString}/${input.local}" .replaceAllLiterally(AwsBatchWorkingDisk.MountPoint.pathAsString, workDir) case input: AwsBatchFileInput => @@ -149,11 +141,40 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL s"test -e $filePath || echo 'input file: $filePath does not exist' && exit 1" case _ => "" - }.toList).mkString("\n") + }.toList.mkString("\n") // this goes at the start of the script after the #! val preamble = s""" + |function _s3_localize_with_retry() { + | local s3_path=$$1 + | # destination must be the path to a file and not just the directory you want the file in + | local destination=$$2 + | + | if [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then + | bucket="$${BASH_REMATCH[1]}" + | key="$${BASH_REMATCH[2]}" + | content_length=$$($awsCmd s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') + | else + | echo "$$s3_path is not an S3 path with a bucket and key. aborting" + | exit 1 + | fi + | + | + | for i in {1..5}; + | do + | $awsCmd s3 cp --no-progress "$$s3_path" "$$destination" && + | [[ $$(LC_ALL=C ls -dn -- "$$destination" | awk '{print $$5; exit}') -eq "$$content_length" ]] && break || + | echo "attempt $$i to copy $$s3_path failed"; + | + | if [ "$$i" -eq 5 ]; then + | echo "failed to copy $$s3_path after $$i attempts. aborting" + | exit 2 + | fi + | sleep $$((7 * "$$i")) + | done + |} + | |{ |set -e |echo '*** LOCALIZING INPUTS ***' @@ -183,24 +204,24 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL */ s""" |touch ${output.name} - |$s3Cmd cp --no-progress ${output.name} ${output.s3key} - |if [ -e $globDirectory ]; then $s3Cmd cp --no-progress $globDirectory $s3GlobOutDirectory --recursive --exclude "cromwell_glob_control_file"; fi + |$awsCmd s3 cp --no-progress ${output.name} ${output.s3key} + |if [ -e $globDirectory ]; then $awsCmd s3 cp --no-progress $globDirectory $s3GlobOutDirectory --recursive --exclude "cromwell_glob_control_file"; fi |""".stripMargin case output: AwsBatchFileOutput if output.s3key.startsWith("s3://") && output.mount.mountPoint.pathAsString == AwsBatchWorkingDisk.MountPoint.pathAsString => //output is on working disk mount s""" - |$s3Cmd cp --no-progress $workDir/${output.local.pathAsString} ${output.s3key} + |$awsCmd s3 cp --no-progress $workDir/${output.local.pathAsString} ${output.s3key} |""".stripMargin case output: AwsBatchFileOutput => //output on a different mount - s"$s3Cmd cp --no-progress ${output.mount.mountPoint.pathAsString}/${output.local.pathAsString} ${output.s3key}" + s"$awsCmd s3 cp --no-progress ${output.mount.mountPoint.pathAsString}/${output.local.pathAsString} ${output.s3key}" case _ => "" }.mkString("\n") + "\n" + s""" - |if [ -f $workDir/${jobPaths.returnCodeFilename} ]; then $s3Cmd cp --no-progress $workDir/${jobPaths.returnCodeFilename} ${jobPaths.callRoot.pathAsString}/${jobPaths.returnCodeFilename} ; fi\n - |if [ -f $stdErr ]; then $s3Cmd cp --no-progress $stdErr ${jobPaths.standardPaths.error.pathAsString}; fi - |if [ -f $stdOut ]; then $s3Cmd cp --no-progress $stdOut ${jobPaths.standardPaths.output.pathAsString}; fi + |if [ -f $workDir/${jobPaths.returnCodeFilename} ]; then $awsCmd s3 cp --no-progress $workDir/${jobPaths.returnCodeFilename} ${jobPaths.callRoot.pathAsString}/${jobPaths.returnCodeFilename} ; fi\n + |if [ -f $stdErr ]; then $awsCmd s3 cp --no-progress $stdErr ${jobPaths.standardPaths.error.pathAsString}; fi + |if [ -f $stdOut ]; then $awsCmd s3 cp --no-progress $stdOut ${jobPaths.standardPaths.output.pathAsString}; fi |""".stripMargin @@ -221,8 +242,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL } private def generateEnvironmentKVPairs(scriptBucketName: String, scriptKeyPrefix: String, scriptKey: String): List[KeyValuePair] = { - List(buildKVPair(AWS_MAX_ATTEMPTS, AWS_MAX_ATTEMPTS_DEFAULT_VALUE), - buildKVPair(AWS_RETRY_MODE, AWS_RETRY_MODE_DEFAULT_VALUE), + List( buildKVPair("BATCH_FILE_TYPE", "script"), buildKVPair("BATCH_FILE_S3_URL",batch_file_s3_url(scriptBucketName,scriptKeyPrefix,scriptKey))) } @@ -442,7 +462,6 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL } yield runStatus def detail(jobId: String): JobDetail = { - //TODO: This client call should be wrapped in a cats Effect val describeJobsResponse = batchClient.describeJobs(DescribeJobsRequest.builder.jobs(jobId).build) val jobDetail = describeJobsResponse.jobs.asScala.headOption. From 986ab318221148443c5ba51ea52b9f1b5b4055d1 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Fri, 23 Jul 2021 16:33:14 -0400 Subject: [PATCH 020/326] removed need to have object acl access --- .../src/main/java/org/lerch/s3fs/util/S3Utils.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3Utils.java b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3Utils.java index 73e25a53b04..4f4a3d24996 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3Utils.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3Utils.java @@ -5,6 +5,8 @@ import org.lerch.s3fs.attribute.S3BasicFileAttributes; import org.lerch.s3fs.attribute.S3PosixFileAttributes; import org.lerch.s3fs.attribute.S3UserPrincipal; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.*; @@ -19,6 +21,7 @@ * Utilities to work with Amazon S3 Objects. */ public class S3Utils { + Logger log = LoggerFactory.getLogger("S3Utils"); /** * Get the {@link S3Object} that represent this Path or her first child if this path not exists @@ -34,14 +37,20 @@ public S3Object getS3ObjectSummary(S3Path s3Path) throws NoSuchFileException { // try to find the element with the current key (maybe with end slash or maybe not.) try { HeadObjectResponse metadata = client.headObject(HeadObjectRequest.builder().bucket(bucketName).key(key).build()); - GetObjectAclResponse acl = client.getObjectAcl(GetObjectAclRequest.builder().bucket(bucketName).key(key).build()); + Owner objectOwner = Owner.builder().build(); + try { + GetObjectAclResponse acl = client.getObjectAcl(GetObjectAclRequest.builder().bucket(bucketName).key(key).build()); + objectOwner = acl.owner(); + } catch (S3Exception e2){ + log.warn("Unable to determine the owner of object: '{}', setting owner as empty", s3Path); + } S3Object.Builder builder = S3Object.builder(); builder .key(key) .lastModified(metadata.lastModified()) .eTag(metadata.eTag()) - .owner(acl.owner()) + .owner(objectOwner) .size(metadata.contentLength()) .storageClass(metadata.storageClassAsString()); From e171e9139273f50117e969e0acb34144786f1b55 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Sat, 31 Jul 2021 15:23:42 -0400 Subject: [PATCH 021/326] cache s3 clients and vend from S3ClientStore --- .../main/java/org/lerch/s3fs/S3FileStore.java | 34 +------- .../org/lerch/s3fs/util/S3ClientStore.java | 87 +++++++++++++++++++ 2 files changed, 89 insertions(+), 32 deletions(-) create mode 100644 filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java index 5a38a882ee3..93f9faf3cba 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java @@ -1,5 +1,6 @@ package org.lerch.s3fs; +import org.lerch.s3fs.util.S3ClientStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.awscore.exception.AwsServiceException; @@ -136,38 +137,7 @@ public S3Path getRootDirectory() { * @return a client */ public S3Client getClient() { - if (bucketSpecificClient == null) { - try { - logger.debug("Determining bucket location with getBucketLocation"); - String bucketLocation = defaultClient.getBucketLocation(builder -> builder.bucket(this.name)).locationConstraintAsString(); - - bucketSpecificClient = this.clientForRegion(bucketLocation); - - } catch (S3Exception e) { - if(e.statusCode() == 403) { - logger.info("Cannot determine bucket location directly. Attempting to obtain bucket location with headBucket operation"); - try { - final HeadBucketResponse headBucketResponse = defaultClient.headBucket(builder -> builder.bucket(this.name)); - bucketSpecificClient = this.clientForRegion(headBucketResponse.sdkHttpResponse().firstMatchingHeader("x-amz-bucket-region").orElseThrow()); - } catch (S3Exception e2) { - if (e2.statusCode() == 301) { - bucketSpecificClient = this.clientForRegion(e2.awsErrorDetails().sdkHttpResponse().firstMatchingHeader("x-amz-bucket-region").orElseThrow()); - } - } - } else { - logger.warn("Cannot determine location of {}, falling back to default s3 client for the current profile", this.name); - bucketSpecificClient = S3Client.create(); - } - } - } - - return bucketSpecificClient; - } - - private S3Client clientForRegion(String regionString){ - Region region = regionString.equals("") ? Region.US_EAST_1 : Region.of(regionString); - logger.debug("Bucket region is: '{}'", region.id()); - return S3Client.builder().region(region).build(); + return S3ClientStore.getInstance().getClientForBucketName(this.name); } public Owner getOwner() { diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java new file mode 100644 index 00000000000..61c0286e229 --- /dev/null +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java @@ -0,0 +1,87 @@ +package org.lerch.s3fs.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; + +import java.net.URI; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * A Singleton cache of clients for buckets configured for the region of those buckets + */ +public class S3ClientStore { + + private static final S3ClientStore instance = new S3ClientStore(); + + public static final S3Client DEFAULT_CLIENT = S3Client.builder().endpointOverride(URI.create("https://s3.us-east-1.amazonaws.com")).region(Region.US_EAST_1).build(); + + private final Map bucketToClientMap = Collections.synchronizedMap(new HashMap<>()); + + Logger logger = LoggerFactory.getLogger("S3ClientStore"); + + + private S3ClientStore(){} + + public static S3ClientStore getInstance(){ + return instance; + } + + public S3Client getClientForBucketName( String bucketName ) { + logger.info("obtaining client for bucket '{}'", bucketName); + if (bucketName == null || bucketName.trim().equals("")) { + return DEFAULT_CLIENT; + } + + return bucketToClientMap.computeIfAbsent(bucketName, this::generateClient); + } + + private S3Client generateClient (String name) { + logger.info("generating client for bucket: '{}'", name); + S3Client bucketSpecificClient; + try { + logger.info("determining bucket location with getBucketLocation"); + String bucketLocation = DEFAULT_CLIENT.getBucketLocation(builder -> builder.bucket(name)).locationConstraintAsString(); + + bucketSpecificClient = this.clientForRegion(bucketLocation); + + } catch (S3Exception e) { + if(e.statusCode() == 403) { + logger.info("Cannot determine bucket location directly. Attempting to obtain bucket location with headBucket operation"); + try { + final HeadBucketResponse headBucketResponse = DEFAULT_CLIENT.headBucket(builder -> builder.bucket(name)); + bucketSpecificClient = this.clientForRegion(headBucketResponse.sdkHttpResponse().firstMatchingHeader("x-amz-bucket-region").orElseThrow()); + } catch (S3Exception e2) { + if (e2.statusCode() == 301) { + bucketSpecificClient = this.clientForRegion(e2.awsErrorDetails().sdkHttpResponse().firstMatchingHeader("x-amz-bucket-region").orElseThrow()); + } else { + throw e2; + } + } + } else { + throw e; + } + } + + if (bucketSpecificClient == null) { + logger.warn("Unable to determine the region of bucket: '{}'", name); + logger.warn("Generating a client for the current region"); + bucketSpecificClient = S3Client.create(); + } + + return bucketSpecificClient; + } + + private S3Client clientForRegion(String regionString){ + // It may be useful to further cache clients for regions although at some point clients for buckets may need to be + // specialized beyond just region end points. + Region region = regionString.equals("") ? Region.US_EAST_1 : Region.of(regionString); + logger.info("bucket region is: '{}'", region.id()); + return S3Client.builder().region(region).build(); + } +} From 48257ac7b37d7004ac7462bcc7e551eb46c587d3 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Mon, 2 Aug 2021 16:55:13 -0400 Subject: [PATCH 022/326] updated _s3_localize_with_retry so that the attempt to obtain the size of the object is moved within the retry loop --- .../backend/impl/aws/AwsBatchJob.scala | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 3903fe8a786..64228132fb9 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -151,18 +151,16 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | # destination must be the path to a file and not just the directory you want the file in | local destination=$$2 | - | if [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then - | bucket="$${BASH_REMATCH[1]}" - | key="$${BASH_REMATCH[2]}" - | content_length=$$($awsCmd s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') - | else - | echo "$$s3_path is not an S3 path with a bucket and key. aborting" - | exit 1 - | fi - | - | | for i in {1..5}; | do + | if [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then + | bucket="$${BASH_REMATCH[1]}" + | key="$${BASH_REMATCH[2]}" + | content_length=$$($awsCmd s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') + | else + | echo "$$s3_path is not an S3 path with a bucket and key. aborting" + | exit 1 + | fi | $awsCmd s3 cp --no-progress "$$s3_path" "$$destination" && | [[ $$(LC_ALL=C ls -dn -- "$$destination" | awk '{print $$5; exit}') -eq "$$content_length" ]] && break || | echo "attempt $$i to copy $$s3_path failed"; From 4fcc41658894f07633dd469aee3ce184555c06e3 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Tue, 3 Aug 2021 15:07:03 -0400 Subject: [PATCH 023/326] Increased metadata endpoint retries --- .../s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java | 5 ++--- .../main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala | 3 +++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java index 61c0286e229..a3720d3bab8 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java @@ -52,7 +52,7 @@ private S3Client generateClient (String name) { } catch (S3Exception e) { if(e.statusCode() == 403) { - logger.info("Cannot determine bucket location directly. Attempting to obtain bucket location with headBucket operation"); + logger.info("Cannot determine location of '{}' bucket directly. Attempting to obtain bucket location with headBucket operation", name); try { final HeadBucketResponse headBucketResponse = DEFAULT_CLIENT.headBucket(builder -> builder.bucket(name)); bucketSpecificClient = this.clientForRegion(headBucketResponse.sdkHttpResponse().firstMatchingHeader("x-amz-bucket-region").orElseThrow()); @@ -69,8 +69,7 @@ private S3Client generateClient (String name) { } if (bucketSpecificClient == null) { - logger.warn("Unable to determine the region of bucket: '{}'", name); - logger.warn("Generating a client for the current region"); + logger.warn("Unable to determine the region of bucket: '{}'. Generating a client for the current region.", name); bucketSpecificClient = S3Client.create(); } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 64228132fb9..7df82c99ea7 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -146,6 +146,9 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL // this goes at the start of the script after the #! val preamble = s""" + |export AWS_METADATA_SERVICE_TIMEOUT=10 + |export AWS_METADATA_SERVICE_NUM_ATTEMPTS=10 + | |function _s3_localize_with_retry() { | local s3_path=$$1 | # destination must be the path to a file and not just the directory you want the file in From 2eb86cf95ad9a8313b2575a9e9e86064777752bd Mon Sep 17 00:00:00 2001 From: henriqueribeiro Date: Wed, 4 Aug 2021 18:56:35 +0100 Subject: [PATCH 024/326] feat: implement aws batch retries (#1) * add awsBatchRetryAttempts as a runtime attribute * exit reconfigure-script with same exit code as rc file * add awsBatchRetryAttempts to AwsBatchJobSpec test * add documentation for awsBatchRetryAttempts * create ulimits runtime attribute * add 'ulimits' runtime attribute * add ulimits to docs * add ulimits to test; missing proper tests --- .../main/scala/cromwell/backend/backend.scala | 2 + docs/RuntimeAttributes.md | 53 +++++++- .../metadata/impl/MetadataServiceActor.scala | 2 +- ...wsBatchAsyncBackendJobExecutionActor.scala | 16 ++- .../backend/impl/aws/AwsBatchAttributes.scala | 9 +- .../backend/impl/aws/AwsBatchJob.scala | 11 +- .../impl/aws/AwsBatchJobDefinition.scala | 107 ++++++++++------ .../impl/aws/AwsBatchRuntimeAttributes.scala | 118 +++++++++++++++++- .../backend/impl/aws/AwsBatchJobSpec.scala | 2 + .../aws/AwsBatchRuntimeAttributesSpec.scala | 33 ++++- .../backend/impl/aws/AwsBatchTestConfig.scala | 2 + .../values/EngineFunctionEvaluators.scala | 2 +- 12 files changed, 301 insertions(+), 56 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/backend.scala b/backend/src/main/scala/cromwell/backend/backend.scala index 601207362ef..30317d0684c 100644 --- a/backend/src/main/scala/cromwell/backend/backend.scala +++ b/backend/src/main/scala/cromwell/backend/backend.scala @@ -141,6 +141,8 @@ object CommonBackendConfigurationAttributes { "default-runtime-attributes.noAddress", "default-runtime-attributes.docker", "default-runtime-attributes.queueArn", + "default-runtime-attributes.awsBatchRetryAttempts", + "default-runtime-attributes.ulimits", "default-runtime-attributes.failOnStderr", "slow-job-warning-time", "dockerhub", diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index 7675cc767f4..1bce8a7b306 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -56,6 +56,9 @@ There are a number of additional runtime attributes that apply to the Google Clo - [useDockerImageCache](#usedockerimagecache) +### AWS Specific Attributes +- [awsBatchRetryAttempts](#awsBatchRetryAttempts) +- [ulimits](#ulimits) ## Expression support @@ -323,8 +326,6 @@ runtime { ``` - - ### `bootDiskSizeGb` In addition to working disks, Google Cloud allows specification of a boot disk size. This is the disk where the docker image itself is booted (**not the working directory of your task on the VM**). @@ -373,6 +374,54 @@ runtime { } ``` + +### `awsBatchRetryAttempts` + +*Default: _0_* + +This runtime attribute adds support to [*AWS Batch Automated Job Retries*](https://docs.aws.amazon.com/batch/latest/userguide/job_retries.html) which makes it possible to tackle transient job failures. For example, if a task fails due to a timeout from accessing an external service, then this option helps re-run the failed the task without having to re-run the entire workflow. It takes an Int, between 1 and 10, as a value that indicates the maximum number of times AWS Batch should retry a failed task. If the value 0 is passed, the [*Retry Strategy*](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#retryStrategy) will not be added to the job definiton and the task will run just once. + +``` +runtime { + awsBatchRetryAttempts: integer +} +``` + + +### `ulimits` + +*Default: _empty_* + +A list of [`ulimits`](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#containerProperties) values to set in the container. This parameter maps to `Ulimits` in the [Create a container](https://docs.docker.com/engine/api/v1.38/) section of the [Docker Remote API](https://docs.docker.com/engine/api/v1.38/) and the `--ulimit` option to [docker run](https://docs.docker.com/engine/reference/commandline/run/). + +``` +"ulimits": [ + { + "name": string, + "softLimit": integer, + "hardLimit": integer + } + ... +] +``` +Parameter description: + +- `name` + - The `type` of the `ulimit`. + - Type: String + - Required: Yes, when `ulimits` is used. + +- `softLimit` + - The soft limit for the `ulimit` type. + - Type: Integer + - Required: Yes, when `ulimits` is used. + +- `hardLimit` + - The hard limit for the `ulimit` type. + - Type: Integer + - Required: Yes, when `ulimits` is used. + + #### How to Setup Configure your Google network to use "Private Google Access". This will allow your VMs to access Google Services including Google Container Registry, as well as Dockerhub images. diff --git a/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala index a3c4f5fa569..954f5f355f8 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/MetadataServiceActor.scala @@ -56,7 +56,7 @@ case class MetadataServiceActor(serviceConfig: Config, globalConfig: Config, ser private val metadataReadTimeout: Duration = serviceConfig.getOrElse[Duration]("metadata-read-query-timeout", Duration.Inf) private val metadataReadRowNumberSafetyThreshold: Int = - serviceConfig.getOrElse[Int]("metadata-read-row-number-safety-threshold", 1000000) + serviceConfig.getOrElse[Int]("metadata-read-row-number-safety-threshold", 3000000) def readMetadataWorkerActorProps(): Props = ReadDatabaseMetadataWorkerActor diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 7a36946e78d..4ceb0c8295e 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -202,10 +202,18 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar private def inputsFromWomFiles(namePrefix: String, remotePathArray: Seq[WomFile], localPathArray: Seq[WomFile], - jobDescriptor: BackendJobDescriptor): Iterable[AwsBatchInput] = { + jobDescriptor: BackendJobDescriptor, + flag: Boolean): Iterable[AwsBatchInput] = { + (remotePathArray zip localPathArray zipWithIndex) flatMap { case ((remotePath, localPath), index) => - Seq(AwsBatchFileInput(s"$namePrefix-$index", remotePath.valueString, DefaultPathBuilder.get(localPath.valueString), workingDisk)) + var localPathString = localPath.valueString + if (localPathString.startsWith("s3://")){ + localPathString = localPathString.replace("s3://", "") + }else if (localPathString.startsWith("s3:/")) { + localPathString = localPathString.replace("s3:/", "") + } + Seq(AwsBatchFileInput(s"$namePrefix-$index", remotePath.valueString, DefaultPathBuilder.get(localPathString), workingDisk)) } } @@ -237,7 +245,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar val writeFunctionFiles = instantiatedCommand.createdFiles map { f => f.file.value.md5SumShort -> List(f) } toMap val writeFunctionInputs = writeFunctionFiles flatMap { - case (name, files) => inputsFromWomFiles(name, files.map(_.file), files.map(localizationPath), jobDescriptor) + case (name, files) => inputsFromWomFiles(name, files.map(_.file), files.map(localizationPath), jobDescriptor, false) } // Collect all WomFiles from inputs to the call. @@ -257,7 +265,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar } val callInputInputs = callInputFiles flatMap { - case (name, files) => inputsFromWomFiles(name, files, files.map(relativeLocalizationPath), jobDescriptor) + case (name, files) => inputsFromWomFiles(name, files, files.map(relativeLocalizationPath), jobDescriptor, true) } val scriptInput: AwsBatchInput = AwsBatchFileInput( diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala index 26f69c4e79a..dffcb983235 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala @@ -71,7 +71,14 @@ object AwsBatchAttributes { "filesystems.local.auth", "filesystems.s3.auth", "filesystems.s3.caching.duplication-strategy", - "filesystems.local.caching.duplication-strategy" + "filesystems.local.caching.duplication-strategy", + "auth", + "numCreateDefinitionAttempts", + "filesystems.s3.duplication-strategy", + "numSubmitAttempts", + "default-runtime-attributes.scriptBucketName", + "awsBatchRetryAttempts", + "ulimits" ) private val deprecatedAwsBatchKeys: Map[String, String] = Map( diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 7df82c99ea7..1e2efaa2d5c 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -234,6 +234,8 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL |echo '*** DELOCALIZING OUTPUTS ***' |$outputCopyCommand |echo '*** COMPLETED DELOCALIZATION ***' + |echo '*** EXITING WITH RC CODE ***' + |exit $$(head -n 1 $workDir/${jobPaths.returnCodeFilename}) |} |""".stripMargin } @@ -409,16 +411,19 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL // See: // // http://aws-java-sdk-javadoc.s3-website-us-west-2.amazonaws.com/latest/software/amazon/awssdk/services/batch/model/RegisterJobDefinitionRequest.Builder.html - val definitionRequest = RegisterJobDefinitionRequest.builder + var definitionRequest = RegisterJobDefinitionRequest.builder .containerProperties(jobDefinition.containerProperties) .jobDefinitionName(jobDefinitionName) // See https://stackoverflow.com/questions/24349517/scala-method-named-type .`type`(JobDefinitionType.CONTAINER) - .build + + if (jobDefinitionContext.runtimeAttributes.awsBatchRetryAttempts != 0){ + definitionRequest = definitionRequest.retryStrategy(jobDefinition.retryStrategy) + } Log.debug(s"Submitting definition request: $definitionRequest") - val response: RegisterJobDefinitionResponse = batchClient.registerJobDefinition(definitionRequest) + val response: RegisterJobDefinitionResponse = batchClient.registerJobDefinition(definitionRequest.build) Log.info(s"Definition created: $response") response.jobDefinitionArn() } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala index 137cce9a4ef..876a23c32c0 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala @@ -35,7 +35,7 @@ import scala.language.postfixOps import scala.collection.mutable.ListBuffer import cromwell.backend.BackendJobDescriptor import cromwell.backend.io.JobPaths -import software.amazon.awssdk.services.batch.model.{ContainerProperties, Host, KeyValuePair, MountPoint, Volume} +import software.amazon.awssdk.services.batch.model.{ContainerProperties, Host, KeyValuePair, MountPoint, RetryStrategy, Volume, Ulimit} import cromwell.backend.impl.aws.io.AwsBatchVolume import scala.collection.JavaConverters._ @@ -62,12 +62,14 @@ import wdl4s.parser.MemoryUnit */ sealed trait AwsBatchJobDefinition { def containerProperties: ContainerProperties + def retryStrategy: RetryStrategy def name: String override def toString: String = { new ToStringBuilder(this, ToStringStyle.JSON_STYLE) .append("name", name) .append("containerProperties", containerProperties) + .append("retryStrategy", retryStrategy) .build } } @@ -78,23 +80,13 @@ trait AwsBatchJobDefinitionBuilder { /** Gets a builder, seeded with appropriate portions of the container properties * - * @param dockerImage docker image with which to run - * @return ContainerProperties builder ready for modification + * @param context AwsBatchJobDefinitionContext with all the runtime attributes + * @return ContainerProperties builder ready for modification and name * */ - def builder(dockerImage: String): ContainerProperties.Builder = - ContainerProperties.builder().image(dockerImage) - - - def buildResources(builder: ContainerProperties.Builder, - context: AwsBatchJobDefinitionContext): (ContainerProperties.Builder, String) = { - // The initial buffer should only contain one item - the hostpath of the - // local disk mount point, which will be needed by the docker container - // that copies data around - - val environment = List.empty[KeyValuePair] - - + def containerPropertiesBuilder(context: AwsBatchJobDefinitionContext): (ContainerProperties.Builder, String) = { + + def buildVolumes(disks: Seq[AwsBatchVolume]): List[Volume] = { //all the configured disks plus the fetch and run volume and the aws-cli volume @@ -111,6 +103,7 @@ trait AwsBatchJobDefinitionBuilder { ) } + def buildMountPoints(disks: Seq[AwsBatchVolume]): List[MountPoint] = { //all the configured disks plus the fetch and run mount point and the AWS cli mount point @@ -130,45 +123,63 @@ trait AwsBatchJobDefinitionBuilder { ) } - def buildName(imageName: String, packedCommand: String, volumes: List[Volume], mountPoints: List[MountPoint], env: Seq[KeyValuePair]): String = { - val str = s"$imageName:$packedCommand:${volumes.map(_.toString).mkString(",")}:${mountPoints.map(_.toString).mkString(",")}:${env.map(_.toString).mkString(",")}" - val sha1 = MessageDigest.getInstance("SHA-1") - .digest( str.getBytes("UTF-8") ) - .map("%02x".format(_)).mkString - - val prefix = s"cromwell_$imageName".slice(0,88) // will be joined to a 40 character SHA1 for total length of 128 + def buildUlimits(ulimits: Seq[Map[String, String]]): List[Ulimit] = { - sanitize(prefix + sha1) + ulimits.filter(_.nonEmpty).map(u => + Ulimit.builder() + .name(u("name")) + .softLimit(u("softLimit").toInt) + .hardLimit(u("hardLimit").toInt) + .build() + ).toList } + def buildName(imageName: String, packedCommand: String, volumes: List[Volume], mountPoints: List[MountPoint], env: Seq[KeyValuePair], ulimits: List[Ulimit]): String = { + s"$imageName:$packedCommand:${volumes.map(_.toString).mkString(",")}:${mountPoints.map(_.toString).mkString(",")}:${env.map(_.toString).mkString(",")}:${ulimits.map(_.toString).mkString(",")}" + } + + + val environment = List.empty[KeyValuePair] val cmdName = context.runtimeAttributes.fileSystem match { - case AWSBatchStorageSystems.s3 => "/var/scratch/fetch_and_run.sh" - case _ => context.commandText + case AWSBatchStorageSystems.s3 => "/var/scratch/fetch_and_run.sh" + case _ => context.commandText } val packedCommand = packCommand("/bin/bash", "-c", cmdName) val volumes = buildVolumes( context.runtimeAttributes.disks ) val mountPoints = buildMountPoints( context.runtimeAttributes.disks) - val jobDefinitionName = buildName( + val ulimits = buildUlimits( context.runtimeAttributes.ulimits) + val containerPropsName = buildName( context.runtimeAttributes.dockerImage, packedCommand.mkString(","), volumes, mountPoints, - environment + environment, + ulimits ) - (builder - .command(packedCommand.asJava) - .memory(context.runtimeAttributes.memory.to(MemoryUnit.MB).amount.toInt) - .vcpus(context.runtimeAttributes.cpu##) - .volumes( volumes.asJava) - .mountPoints( mountPoints.asJava) - .environment(environment.asJava), + (ContainerProperties.builder() + .image(context.runtimeAttributes.dockerImage) + .command(packedCommand.asJava) + .memory(context.runtimeAttributes.memory.to(MemoryUnit.MB).amount.toInt) + .vcpus(context.runtimeAttributes.cpu##) + .volumes(volumes.asJava) + .mountPoints(mountPoints.asJava) + .environment(environment.asJava) + .ulimits(ulimits.asJava), + containerPropsName) + } - jobDefinitionName) + def retryStrategyBuilder(context: AwsBatchJobDefinitionContext): (RetryStrategy.Builder, String) = { + // We can add here the 'evaluateOnExit' statement + + (RetryStrategy.builder() + .attempts(context.runtimeAttributes.awsBatchRetryAttempts), + context.runtimeAttributes.awsBatchRetryAttempts.toString) } + private def packCommand(shell: String, options: String, mainCommand: String): Seq[String] = { val rc = new ListBuffer[String]() val lim = 1024 @@ -189,15 +200,29 @@ trait AwsBatchJobDefinitionBuilder { object StandardAwsBatchJobDefinitionBuilder extends AwsBatchJobDefinitionBuilder { def build(context: AwsBatchJobDefinitionContext): AwsBatchJobDefinition = { - //instantiate a builder with the name of the docker image - val builderInst = builder(context.runtimeAttributes.dockerImage) - val (b, name) = buildResources(builderInst, context) + + val (containerPropsInst, containerPropsName) = containerPropertiesBuilder(context) + val (retryStrategyInst, retryStrategyName) = retryStrategyBuilder(context) - new StandardAwsBatchJobDefinitionBuilder(b.build, name) + val name = buildName(context.runtimeAttributes.dockerImage, containerPropsName, retryStrategyName) + + new StandardAwsBatchJobDefinitionBuilder(containerPropsInst.build, retryStrategyInst.build, name) } + + def buildName(imageName: String, containerPropsName: String, retryStrategyName: String): String = { + val str = s"$imageName:$containerPropsName:$retryStrategyName" + + val sha1 = MessageDigest.getInstance("SHA-1") + .digest( str.getBytes("UTF-8") ) + .map("%02x".format(_)).mkString + + val prefix = s"cromwell_${imageName}_".slice(0,88) // will be joined to a 40 character SHA1 for total length of 128 + + sanitize(prefix + sha1) + } } -case class StandardAwsBatchJobDefinitionBuilder private(containerProperties: ContainerProperties, name: String) extends AwsBatchJobDefinition +case class StandardAwsBatchJobDefinitionBuilder private(containerProperties: ContainerProperties, retryStrategy: RetryStrategy, name: String) extends AwsBatchJobDefinition object AwsBatchJobDefinitionContext diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala index c6fc2a5f51f..8296eefd42a 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala @@ -60,6 +60,8 @@ import scala.util.matching.Regex * @param noAddress is there no address * @param scriptS3BucketName the s3 bucket where the execution command or script will be written and, from there, fetched into the container and executed * @param fileSystem the filesystem type, default is "s3" + * @param awsBatchRetryAttempts number of attempts that AWS Batch will retry the task if it fails + * @param ulimits ulimit values to be passed to the container */ case class AwsBatchRuntimeAttributes(cpu: Int Refined Positive, zones: Vector[String], @@ -71,7 +73,9 @@ case class AwsBatchRuntimeAttributes(cpu: Int Refined Positive, continueOnReturnCode: ContinueOnReturnCode, noAddress: Boolean, scriptS3BucketName: String, - fileSystem:String= "s3") + awsBatchRetryAttempts: Int, + ulimits: Vector[Map[String, String]], + fileSystem: String= "s3") object AwsBatchRuntimeAttributes { @@ -79,6 +83,8 @@ object AwsBatchRuntimeAttributes { val scriptS3BucketKey = "scriptBucketName" + val awsBatchRetryAttemptsKey = "awsBatchRetryAttempts" + val ZonesKey = "zones" private val ZonesDefaultValue = WomString("us-east-1a") @@ -92,6 +98,9 @@ object AwsBatchRuntimeAttributes { private val MemoryDefaultValue = "2 GB" + val UlimitsKey = "ulimits" + private val UlimitsDefaultValue = WomArray(WomArrayType(WomMapType(WomStringType,WomStringType)), Vector(WomMap(Map.empty[WomValue, WomValue]))) + private def cpuValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Int Refined Positive] = CpuValidation.instance .withDefault(CpuValidation.configDefaultWomValue(runtimeConfig) getOrElse CpuValidation.defaultMin) @@ -134,6 +143,14 @@ object AwsBatchRuntimeAttributes { QueueArnValidation.withDefault(QueueArnValidation.configDefaultWomValue(runtimeConfig) getOrElse (throw new RuntimeException("queueArn is required"))) + private def awsBatchRetryAttemptsValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Int] = { + AwsBatchRetryAttemptsValidation(awsBatchRetryAttemptsKey).withDefault(AwsBatchRetryAttemptsValidation(awsBatchRetryAttemptsKey) + .configDefaultWomValue(runtimeConfig).getOrElse(WomInteger(0))) + } + + private def ulimitsValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Vector[Map[String, String]]] = + UlimitsValidation.withDefault(UlimitsValidation.configDefaultWomValue(runtimeConfig) getOrElse UlimitsDefaultValue) + def runtimeAttributesBuilder(configuration: AwsBatchConfiguration): StandardValidatedRuntimeAttributesBuilder = { val runtimeConfig = configuration.runtimeConfig def validationsS3backend = StandardValidatedRuntimeAttributesBuilder.default(runtimeConfig).withValidation( @@ -146,7 +163,9 @@ object AwsBatchRuntimeAttributes { noAddressValidation(runtimeConfig), dockerValidation, queueArnValidation(runtimeConfig), - scriptS3BucketNameValidation(runtimeConfig) + scriptS3BucketNameValidation(runtimeConfig), + awsBatchRetryAttemptsValidation(runtimeConfig), + ulimitsValidation(runtimeConfig), ) def validationsLocalBackend = StandardValidatedRuntimeAttributesBuilder.default(runtimeConfig).withValidation( cpuValidation(runtimeConfig), @@ -181,6 +200,8 @@ object AwsBatchRuntimeAttributes { case AWSBatchStorageSystems.s3 => RuntimeAttributesValidation.extract(scriptS3BucketNameValidation(runtimeAttrsConfig) , validatedRuntimeAttributes) case _ => "" } + val awsBatchRetryAttempts: Int = RuntimeAttributesValidation.extract(awsBatchRetryAttemptsValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + val ulimits: Vector[Map[String, String]] = RuntimeAttributesValidation.extract(ulimitsValidation(runtimeAttrsConfig), validatedRuntimeAttributes) new AwsBatchRuntimeAttributes( @@ -194,6 +215,8 @@ object AwsBatchRuntimeAttributes { continueOnReturnCode, noAddress, scriptS3BucketName, + awsBatchRetryAttempts, + ulimits, fileSystem ) } @@ -372,3 +395,94 @@ object DisksValidation extends RuntimeAttributesValidation[Seq[AwsBatchVolume]] override protected def missingValueMessage: String = s"Expecting $key runtime attribute to be a comma separated String or Array[String]" } + +object AwsBatchRetryAttemptsValidation { + def apply(key: String): AwsBatchRetryAttemptsValidation = new AwsBatchRetryAttemptsValidation(key) +} + +class AwsBatchRetryAttemptsValidation(key: String) extends IntRuntimeAttributesValidation(key) { + override protected def validateValue: PartialFunction[WomValue, ErrorOr[Int]] = { + case womValue if WomIntegerType.coerceRawValue(womValue).isSuccess => + WomIntegerType.coerceRawValue(womValue).get match { + case WomInteger(value) => + if (value.toInt < 0) + s"Expecting $key runtime attribute value greater than or equal to 0".invalidNel + else if (value.toInt > 10) + s"Expecting $key runtime attribute value lower than or equal to 10".invalidNel + else + value.toInt.validNel + } + } + + override protected def missingValueMessage: String = s"Expecting $key runtime attribute to be an Integer" +} + + +object UlimitsValidation + extends RuntimeAttributesValidation[Vector[Map[String, String]]] { + override def key: String = AwsBatchRuntimeAttributes.UlimitsKey + + override def coercion: Traversable[WomType] = + Set(WomStringType, WomArrayType(WomMapType(WomStringType, WomStringType))) + + var accepted_keys = Set("name", "softLimit", "hardLimit") + + override protected def validateValue + : PartialFunction[WomValue, ErrorOr[Vector[Map[String, String]]]] = { + case WomArray(womType, value) + if womType.memberType == WomMapType(WomStringType, WomStringType) => + check_maps(value.toVector) + case WomMap(_, _) => "!!! ERROR1".invalidNel + + } + + private def check_maps( + maps: Vector[WomValue] + ): ErrorOr[Vector[Map[String, String]]] = { + val entryNels: Vector[ErrorOr[Map[String, String]]] = maps.map { + case WomMap(_, value) => check_keys(value) + case _ => "!!! ERROR2".invalidNel + } + val sequenced: ErrorOr[Vector[Map[String, String]]] = sequenceNels( + entryNels + ) + sequenced + } + + private def check_keys( + dict: Map[WomValue, WomValue] + ): ErrorOr[Map[String, String]] = { + val map_keys = dict.keySet.map(_.valueString).toSet + val unrecognizedKeys = + accepted_keys.diff(map_keys) union map_keys.diff(accepted_keys) + + if (!dict.nonEmpty){ + Map.empty[String, String].validNel + }else if (unrecognizedKeys.nonEmpty) { + s"Invalid keys in $key runtime attribute. Refer to 'ulimits' section on https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#containerProperties".invalidNel + } else { + dict + .collect { case (WomString(k), WomString(v)) => + (k, v) + // case _ => "!!! ERROR3".invalidNel + } + .toMap + .validNel + } + } + + private def sequenceNels( + nels: Vector[ErrorOr[Map[String, String]]] + ): ErrorOr[Vector[Map[String, String]]] = { + val emptyNel: ErrorOr[Vector[Map[String, String]]] = + Vector.empty[Map[String, String]].validNel + val seqNel: ErrorOr[Vector[Map[String, String]]] = + nels.foldLeft(emptyNel) { (acc, v) => + (acc, v) mapN { (a, v) => a :+ v } + } + seqNel + } + + override protected def missingValueMessage: String = + s"Expecting $key runtime attribute to be an Array[Map[String, String]]" +} \ No newline at end of file diff --git a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala index 5037fc21051..4a7ea041b10 100644 --- a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala +++ b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala @@ -113,6 +113,8 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi continueOnReturnCode = ContinueOnReturnCodeFlag(false), noAddress = false, scriptS3BucketName = "script-bucket", + awsBatchRetryAttempts = 1, + ulimits = Vector(Map.empty[String, String]), fileSystem = "s3") private def generateBasicJob: AwsBatchJob = { diff --git a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributesSpec.scala b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributesSpec.scala index 09e1ee94351..f8c009f95ed 100644 --- a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributesSpec.scala +++ b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributesSpec.scala @@ -65,7 +65,9 @@ class AwsBatchRuntimeAttributesSpec extends AnyWordSpecLike with CromwellTimeout false, ContinueOnReturnCodeSet(Set(0)), false, - "my-stuff") + "my-stuff", + 1, + Vector(Map.empty[String, String])) val expectedDefaultsLocalFS = new AwsBatchRuntimeAttributes(refineMV[Positive](1), Vector("us-east-1a", "us-east-1b"), @@ -76,6 +78,8 @@ class AwsBatchRuntimeAttributesSpec extends AnyWordSpecLike with CromwellTimeout ContinueOnReturnCodeSet(Set(0)), false, "", + 1, + Vector(Map.empty[String, String]), "local") "AwsBatchRuntimeAttributes" should { @@ -339,6 +343,33 @@ class AwsBatchRuntimeAttributesSpec extends AnyWordSpecLike with CromwellTimeout val expectedRuntimeAttributes = expectedDefaults.copy(cpu = refineMV[Positive](4)) assertAwsBatchRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes, workflowOptions) } + + "validate a valid awsBatchRetryAttempts entry" in { + val runtimeAttributes = Map("docker" -> WomString("ubuntu:latest"), "awsBatchRetryAttempts" -> WomInteger(9), "scriptBucketName" -> WomString("my-stuff")) + val expectedRuntimeAttributes = expectedDefaults.copy(awsBatchRetryAttempts = 9) + assertAwsBatchRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) + } + + "fail to validate with -1 as awsBatchRetryAttempts" in { + val runtimeAttributes = Map("docker" -> WomString("ubuntu:latest"), "awsBatchRetryAttempts" -> WomInteger(-1), "scriptBucketName" -> WomString("my-stuff")) + assertAwsBatchRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting awsBatchRetryAttempts runtime attribute value greater than or equal to 0") + } + + "fail to validate with 12 as awsBatchRetryAttempts" in { + val runtimeAttributes = Map("docker" -> WomString("ubuntu:latest"), "awsBatchRetryAttempts" -> WomInteger(12), "scriptBucketName" -> WomString("my-stuff")) + assertAwsBatchRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting awsBatchRetryAttempts runtime attribute value lower than or equal to 10") + } + + "fail to validate with a string as awsBatchRetryAttempts" in { + val runtimeAttributes = Map("docker" -> WomString("ubuntu:latest"), "awsBatchRetryAttempts" -> WomString("test"), "scriptBucketName" -> WomString("my-stuff")) + assertAwsBatchRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting awsBatchRetryAttempts runtime attribute to be an Integer") + } + + "validate zero as awsBatchRetryAttempts entry" in { + val runtimeAttributes = Map("docker" -> WomString("ubuntu:latest"), "awsBatchRetryAttempts" -> WomInteger(0), "scriptBucketName" -> WomString("my-stuff")) + val expectedRuntimeAttributes = expectedDefaults.copy(awsBatchRetryAttempts = 0) + assertAwsBatchRuntimeAttributesSuccessfulCreation(runtimeAttributes, expectedRuntimeAttributes) + } } private def assertAwsBatchRuntimeAttributesSuccessfulCreation(runtimeAttributes: Map[String, WomValue], diff --git a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchTestConfig.scala b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchTestConfig.scala index 38545c7e472..682714b225c 100644 --- a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchTestConfig.scala +++ b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchTestConfig.scala @@ -61,6 +61,7 @@ object AwsBatchTestConfig { | zones:["us-east-1a", "us-east-1b"] | queueArn: "arn:aws:batch:us-east-1:111222333444:job-queue/job-queue" | scriptBucketName: "my-bucket" + | awsBatchRetryAttempts: 1 |} | |""".stripMargin @@ -140,6 +141,7 @@ object AwsBatchTestConfigForLocalFS { | zones:["us-east-1a", "us-east-1b"] | queueArn: "arn:aws:batch:us-east-1:111222333444:job-queue/job-queue" | scriptBucketName: "" + | awsBatchRetryAttempts: 1 |} | |""".stripMargin diff --git a/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/linking/expression/values/EngineFunctionEvaluators.scala b/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/linking/expression/values/EngineFunctionEvaluators.scala index 0a2bf3b998a..21f6f7516a5 100644 --- a/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/linking/expression/values/EngineFunctionEvaluators.scala +++ b/wdl/transforms/new-base/src/main/scala/wdl/transforms/base/linking/expression/values/EngineFunctionEvaluators.scala @@ -51,7 +51,7 @@ object EngineFunctionEvaluators { EvaluatedValue(WomSingleFile(ioFunctionSet.pathFunctions.stderr), Seq.empty).validNel } - private val ReadWaitTimeout = 60.seconds + private val ReadWaitTimeout = 300.seconds private def readFile(fileToRead: WomSingleFile, ioFunctionSet: IoFunctionSet, sizeLimit: Int) = { Try(Await.result(ioFunctionSet.readFile(fileToRead.value, Option(sizeLimit), failOnOverflow = true), ReadWaitTimeout)) } From e977d082e54605f4c938474c29d41913710282a2 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Mon, 23 Aug 2021 17:52:33 -0400 Subject: [PATCH 025/326] Improved cross region object access and better handle permission restrictions --- .../main/java/org/lerch/s3fs/S3FileStore.java | 3 +-- .../org/lerch/s3fs/util/S3ClientStore.java | 2 +- .../s3/batch/S3BatchCommandBuilder.scala | 27 ++++++++++++++++++- .../backend/impl/aws/AwsBatchJob.scala | 6 +++-- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java index 93f9faf3cba..8e15999180a 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileStore.java @@ -25,8 +25,7 @@ public class S3FileStore extends FileStore implements Comparable { private S3FileSystem fileSystem; private String name; private S3Client defaultClient; - private S3Client bucketSpecificClient; - private Logger logger = LoggerFactory.getLogger("S3FileStore"); + private final Logger logger = LoggerFactory.getLogger("S3FileStore"); public S3FileStore(S3FileSystem s3FileSystem, String name) { this.fileSystem = s3FileSystem; diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java index a3720d3bab8..78a139d3f18 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java @@ -33,7 +33,7 @@ public static S3ClientStore getInstance(){ } public S3Client getClientForBucketName( String bucketName ) { - logger.info("obtaining client for bucket '{}'", bucketName); + logger.debug("obtaining client for bucket '{}'", bucketName); if (bucketName == null || bucketName.trim().equals("")) { return DEFAULT_CLIENT; } diff --git a/filesystems/s3/src/main/scala/cromwell/filesystems/s3/batch/S3BatchCommandBuilder.scala b/filesystems/s3/src/main/scala/cromwell/filesystems/s3/batch/S3BatchCommandBuilder.scala index 8058d897975..645600d1613 100644 --- a/filesystems/s3/src/main/scala/cromwell/filesystems/s3/batch/S3BatchCommandBuilder.scala +++ b/filesystems/s3/src/main/scala/cromwell/filesystems/s3/batch/S3BatchCommandBuilder.scala @@ -30,9 +30,11 @@ */ package cromwell.filesystems.s3.batch -import cromwell.core.io.{IoCommandBuilder, PartialIoCommandBuilder} +import cromwell.core.io.{IoCommandBuilder, IoContentAsStringCommand, IoIsDirectoryCommand, IoReadLinesCommand, IoWriteCommand, PartialIoCommandBuilder} +import cromwell.core.path.BetterFileMethods.OpenOptions import cromwell.core.path.Path import cromwell.filesystems.s3.S3Path +import org.slf4j.{Logger, LoggerFactory} import scala.util.Try @@ -40,6 +42,29 @@ import scala.util.Try * Generates commands for IO operations on S3 */ private case object PartialS3BatchCommandBuilder extends PartialIoCommandBuilder { + val Log: Logger = LoggerFactory.getLogger(PartialS3BatchCommandBuilder.getClass) + + + override def contentAsStringCommand: PartialFunction[(Path, Option[Int], Boolean), Try[IoContentAsStringCommand]] = { + Log.debug("call to contentAsStringCommand but PartialFunction not implemented, falling back to super") + super.contentAsStringCommand + } + + override def writeCommand: PartialFunction[(Path, String, OpenOptions, Boolean), Try[IoWriteCommand]] = { + Log.debug("call to writeCommand but PartialFunction not implemented, falling back to super") + super.writeCommand + } + + override def isDirectoryCommand: PartialFunction[Path, Try[IoIsDirectoryCommand]] = { + Log.debug("call to isDirectoryCommand but PartialFunction not implemented, falling back to super") + super.isDirectoryCommand + } + + override def readLinesCommand: PartialFunction[Path, Try[IoReadLinesCommand]] = { + Log.debug("call to readLinesCommand but PartialFunction not implemented, falling back to super") + super.readLinesCommand + } + override def sizeCommand: PartialFunction[Path, Try[S3BatchSizeCommand]] = { case path: S3Path => Try(S3BatchSizeCommand(path)) } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 1e2efaa2d5c..5068c4a9784 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -234,8 +234,10 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL |echo '*** DELOCALIZING OUTPUTS ***' |$outputCopyCommand |echo '*** COMPLETED DELOCALIZATION ***' - |echo '*** EXITING WITH RC CODE ***' - |exit $$(head -n 1 $workDir/${jobPaths.returnCodeFilename}) + |echo '*** EXITING WITH RETURN CODE ***' + |rc=$$(head -n 1 $workDir/${jobPaths.returnCodeFilename}) + |echo $$rc + |exit $$rc |} |""".stripMargin } From 155ed46caee71167d3abd061f42632082b078094 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Tue, 5 Oct 2021 12:28:44 -0400 Subject: [PATCH 026/326] Improved testing --- .../main/scala/cloud/nio/spi/UnixPath.scala | 2 +- .../org/lerch/s3fs/util/S3ClientStore.java | 32 +++- .../lerch/s3fs/util/S3ClientStoreTest.java | 167 ++++++++++++++++++ .../backend/impl/aws/AwsBatchJobSpec.scala | 126 +++++++++++-- 4 files changed, 299 insertions(+), 28 deletions(-) create mode 100644 filesystems/s3/src/test/scala/org/lerch/s3fs/util/S3ClientStoreTest.java diff --git a/cloud-nio/cloud-nio-spi/src/main/scala/cloud/nio/spi/UnixPath.scala b/cloud-nio/cloud-nio-spi/src/main/scala/cloud/nio/spi/UnixPath.scala index 306afa44cc7..a5d0770bc56 100644 --- a/cloud-nio/cloud-nio-spi/src/main/scala/cloud/nio/spi/UnixPath.scala +++ b/cloud-nio/cloud-nio-spi/src/main/scala/cloud/nio/spi/UnixPath.scala @@ -69,7 +69,7 @@ final private[spi] case class UnixPath(path: String) extends CharSequence { def isAbsolute: Boolean = UnixPath.isAbsolute(path) - def isEmpty: Boolean = path.isEmpty + override def isEmpty: Boolean = path.isEmpty def hasTrailingSeparator: Boolean = UnixPath.hasTrailingSeparator(path) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java index 78a139d3f18..818a0004a3e 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java @@ -28,9 +28,7 @@ public class S3ClientStore { private S3ClientStore(){} - public static S3ClientStore getInstance(){ - return instance; - } + public static S3ClientStore getInstance() { return instance; } public S3Client getClientForBucketName( String bucketName ) { logger.debug("obtaining client for bucket '{}'", bucketName); @@ -41,20 +39,35 @@ public S3Client getClientForBucketName( String bucketName ) { return bucketToClientMap.computeIfAbsent(bucketName, this::generateClient); } - private S3Client generateClient (String name) { - logger.info("generating client for bucket: '{}'", name); + /** + * Generate a client for the named bucket using a default client to determine the location of the named client + * @param bucketName the named of the bucket to make the client for + * @return an S3 client appropriate for the region of the named bucket + */ + protected S3Client generateClient(String bucketName){ + return this.generateClient(bucketName, DEFAULT_CLIENT); + } + + /** + * Generate a client for the named bucket using a default client to determine the location of the named client + * @param bucketName the named of the bucket to make the client for + * @param locationClient the client used to determine the location of the named bucket, recommend using DEFAULT_CLIENT + * @return an S3 client appropriate for the region of the named bucket + */ + protected S3Client generateClient (String bucketName, S3Client locationClient) { + logger.info("generating client for bucket: '{}'", bucketName); S3Client bucketSpecificClient; try { logger.info("determining bucket location with getBucketLocation"); - String bucketLocation = DEFAULT_CLIENT.getBucketLocation(builder -> builder.bucket(name)).locationConstraintAsString(); + String bucketLocation = locationClient.getBucketLocation(builder -> builder.bucket(bucketName)).locationConstraintAsString(); bucketSpecificClient = this.clientForRegion(bucketLocation); } catch (S3Exception e) { if(e.statusCode() == 403) { - logger.info("Cannot determine location of '{}' bucket directly. Attempting to obtain bucket location with headBucket operation", name); + logger.info("Cannot determine location of '{}' bucket directly. Attempting to obtain bucket location with headBucket operation", bucketName); try { - final HeadBucketResponse headBucketResponse = DEFAULT_CLIENT.headBucket(builder -> builder.bucket(name)); + final HeadBucketResponse headBucketResponse = locationClient.headBucket(builder -> builder.bucket(bucketName)); bucketSpecificClient = this.clientForRegion(headBucketResponse.sdkHttpResponse().firstMatchingHeader("x-amz-bucket-region").orElseThrow()); } catch (S3Exception e2) { if (e2.statusCode() == 301) { @@ -69,7 +82,7 @@ private S3Client generateClient (String name) { } if (bucketSpecificClient == null) { - logger.warn("Unable to determine the region of bucket: '{}'. Generating a client for the current region.", name); + logger.warn("Unable to determine the region of bucket: '{}'. Generating a client for the current region.", bucketName); bucketSpecificClient = S3Client.create(); } @@ -83,4 +96,5 @@ private S3Client clientForRegion(String regionString){ logger.info("bucket region is: '{}'", region.id()); return S3Client.builder().region(region).build(); } + } diff --git a/filesystems/s3/src/test/scala/org/lerch/s3fs/util/S3ClientStoreTest.java b/filesystems/s3/src/test/scala/org/lerch/s3fs/util/S3ClientStoreTest.java new file mode 100644 index 00000000000..6f419d0c335 --- /dev/null +++ b/filesystems/s3/src/test/scala/org/lerch/s3fs/util/S3ClientStoreTest.java @@ -0,0 +1,167 @@ +package org.lerch.s3fs.util; + +import junit.framework.TestCase; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.InOrder; +import org.mockito.Mock; +import static org.mockito.Mockito.*; + +import org.mockito.Spy; +import org.mockito.junit.MockitoJUnitRunner; +import software.amazon.awssdk.awscore.exception.AwsErrorDetails; +import software.amazon.awssdk.http.SdkHttpResponse; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetBucketLocationResponse; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; + +import java.util.NoSuchElementException; +import java.util.function.Consumer; + +@RunWith(MockitoJUnitRunner.class) +public class S3ClientStoreTest extends TestCase { + + S3ClientStore instance; + + @Mock + S3Client mockClient; + + @Spy + final S3ClientStore spyInstance = S3ClientStore.getInstance(); + + @Before + public void setUp() throws Exception { + super.setUp(); + instance = S3ClientStore.getInstance(); + } + + @Test + public void testGetInstanceReturnsSingleton() { + assertSame(S3ClientStore.getInstance(), instance); + } + + @Test + public void testGetClientForNullBucketName() { + assertEquals(S3ClientStore.DEFAULT_CLIENT, instance.getClientForBucketName(null)); + } + + @Test + public void testGetClientForEmptyBucketName() { + assertEquals(S3ClientStore.DEFAULT_CLIENT, instance.getClientForBucketName("")); + assertEquals(S3ClientStore.DEFAULT_CLIENT, instance.getClientForBucketName(" ")); + } + + @Test + public void testGenerateClientWithNoErrors() { + when(mockClient.getBucketLocation(any(Consumer.class))) + .thenReturn(GetBucketLocationResponse.builder().locationConstraint("us-west-2").build()); + final S3Client s3Client = instance.generateClient("test-bucket", mockClient); + assertNotNull(s3Client); + + ; + } + + @Test + public void testGenerateClientWith403Response() { + // when you get a forbidden response from getBucketLocation + when(mockClient.getBucketLocation(any(Consumer.class))).thenThrow( + S3Exception.builder().statusCode(403).build() + ); + // you should fall back to a head bucket attempt + when(mockClient.headBucket(any(Consumer.class))) + .thenReturn((HeadBucketResponse) HeadBucketResponse.builder() + .sdkHttpResponse(SdkHttpResponse.builder() + .putHeader("x-amz-bucket-region", "us-west-2") + .build()) + .build()); + + // which should get you a client + final S3Client s3Client = instance.generateClient("test-bucket", mockClient); + assertNotNull(s3Client); + + final InOrder inOrder = inOrder(mockClient); + inOrder.verify(mockClient).getBucketLocation(any(Consumer.class)); + inOrder.verify(mockClient).headBucket(any(Consumer.class)); + inOrder.verifyNoMoreInteractions(); + } + + @Test + public void testGenerateClientWith403Then301Responses(){ + // when you get a forbidden response from getBucketLocation + when(mockClient.getBucketLocation(any(Consumer.class))).thenThrow( + S3Exception.builder().statusCode(403).build() + ); + // and you get a 301 response on headBucket + when(mockClient.headBucket(any(Consumer.class))).thenThrow( + S3Exception.builder() + .statusCode(301) + .awsErrorDetails(AwsErrorDetails.builder() + .sdkHttpResponse(SdkHttpResponse.builder() + .putHeader("x-amz-bucket-region", "us-west-2") + .build()) + .build()) + .build() + ); + + // then you should be able to get a client as long as the error response header contains the region + final S3Client s3Client = instance.generateClient("test-bucket", mockClient); + assertNotNull(s3Client); + + final InOrder inOrder = inOrder(mockClient); + inOrder.verify(mockClient).getBucketLocation(any(Consumer.class)); + inOrder.verify(mockClient).headBucket(any(Consumer.class)); + inOrder.verifyNoMoreInteractions(); + } + + @Test + public void testGenerateClientWith403Then301ResponsesNoHeader(){ + // when you get a forbidden response from getBucketLocation + when(mockClient.getBucketLocation(any(Consumer.class))).thenThrow( + S3Exception.builder().statusCode(403).build() + ); + // and you get a 301 response on headBucket but no header for region + when(mockClient.headBucket(any(Consumer.class))).thenThrow( + S3Exception.builder() + .statusCode(301) + .awsErrorDetails(AwsErrorDetails.builder() + .sdkHttpResponse(SdkHttpResponse.builder() + .build()) + .build()) + .build() + ); + + // then you should get a NoSuchElement exception when you try to get the header + try { + instance.generateClient("test-bucket", mockClient); + } catch (Exception e) { + assertEquals(NoSuchElementException.class, e.getClass()); + } + + final InOrder inOrder = inOrder(mockClient); + inOrder.verify(mockClient).getBucketLocation(any(Consumer.class)); + inOrder.verify(mockClient).headBucket(any(Consumer.class)); + inOrder.verifyNoMoreInteractions(); + } + + @Test + public void testCaching() { + S3Client client = S3Client.create(); + doReturn(client).when(spyInstance).generateClient("test-bucket"); + + final S3Client client1 = spyInstance.getClientForBucketName("test-bucket"); + verify(spyInstance).generateClient("test-bucket"); + assertSame(client1, client); + + S3Client differentClient = S3Client.create(); + assertNotSame(client, differentClient); + + lenient().doReturn(differentClient).when(spyInstance).generateClient("test-bucket"); + final S3Client client2 = spyInstance.getClientForBucketName("test-bucket"); + // same instance because second is cached. + assertSame(client1, client2); + assertSame(client2, client); + assertNotSame(client2, differentClient); + } +} diff --git a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala index 4a7ea041b10..80d17688a65 100644 --- a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala +++ b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala @@ -34,7 +34,9 @@ package cromwell.backend.impl.aws import common.collections.EnhancedCollections._ import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} import cromwell.backend.BackendSpec._ +import cromwell.backend.impl.aws.io.AwsBatchWorkingDisk import cromwell.backend.validation.ContinueOnReturnCodeFlag +import cromwell.core.path.{DefaultPathBuilder} import cromwell.core.TestKitSuite import cromwell.util.SampleWdl import eu.timepit.refined.api.Refined @@ -84,7 +86,7 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi | | |) - |mv /cromwell_root/hello-rc.txt.tmp /cromwell_root/hello-rc.txt""" + |mv /cromwell_root/hello-rc.txt.tmp /cromwell_root/hello-rc.txt""".stripMargin val workFlowDescriptor: BackendWorkflowDescriptor = buildWdlWorkflowDescriptor( SampleWdl.HelloWorld.workflowSource(), @@ -100,6 +102,8 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi val call: CommandCallNode = workFlowDescriptor.callable.taskCallNodes.head val jobKey: BackendJobDescriptorKey = BackendJobDescriptorKey(call, None, 1) val jobPaths: AwsBatchJobPaths = AwsBatchJobPaths(workflowPaths, jobKey) + val s3Inputs: Set[AwsBatchInput] = Set(AwsBatchFileInput("foo", "s3://bucket/foo", DefaultPathBuilder.get("foo"), AwsBatchWorkingDisk())) + val s3Outputs: Set[AwsBatchFileOutput] = Set(AwsBatchFileOutput("baa", "s3://bucket/somewhere/baa", DefaultPathBuilder.get("baa"), AwsBatchWorkingDisk())) val cpu: Int Refined Positive = 2 val runtimeAttributes: AwsBatchRuntimeAttributes = new AwsBatchRuntimeAttributes( @@ -131,29 +135,22 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi jobPaths, Seq.empty[AwsBatchParameter], None) job } + private def generateJobWithS3InOut: AwsBatchJob = { + val job = AwsBatchJob(null, runtimeAttributes, "commandLine", script, + "/cromwell_root/hello-rc.txt", "/cromwell_root/hello-stdout.log", "/cromwell_root/hello-stderr.log", + s3Inputs, s3Outputs, + jobPaths, Seq.empty[AwsBatchParameter], None) + job + } // TESTS BEGIN HERE behavior of "AwsBatchJob" - - it should "have correctly named AWS constants" in { - - val job: AwsBatchJob = generateBasicJob - - job.AWS_RETRY_MODE should be ("AWS_RETRY_MODE") - job.AWS_RETRY_MODE_DEFAULT_VALUE should be ("adaptive") - job.AWS_MAX_ATTEMPTS should be ("AWS_MAX_ATTEMPTS") - job.AWS_MAX_ATTEMPTS_DEFAULT_VALUE should be ("14") - } - it should "generate appropriate KV pairs for the container environment for S3" in { val job = generateBasicJob val generateEnvironmentKVPairs = PrivateMethod[List[KeyValuePair]]('generateEnvironmentKVPairs) // testing a private method see https://www.scalatest.org/user_guide/using_PrivateMethodTester val kvPairs = job invokePrivate generateEnvironmentKVPairs("script-bucket", "prefix-", "key") - - kvPairs should contain (buildKVPair(job.AWS_MAX_ATTEMPTS, job.AWS_MAX_ATTEMPTS_DEFAULT_VALUE)) - kvPairs should contain (buildKVPair(job.AWS_RETRY_MODE, "adaptive")) kvPairs should contain (buildKVPair("BATCH_FILE_TYPE", "script")) kvPairs should contain (buildKVPair("BATCH_FILE_S3_URL", "s3://script-bucket/prefix-key")) } @@ -164,10 +161,103 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi // testing a private method see https://www.scalatest.org/user_guide/using_PrivateMethodTester val kvPairs = job invokePrivate generateEnvironmentKVPairs("script-bucket", "prefix-", "key") - - kvPairs should contain (buildKVPair(job.AWS_MAX_ATTEMPTS, job.AWS_MAX_ATTEMPTS_DEFAULT_VALUE)) - kvPairs should contain (buildKVPair(job.AWS_RETRY_MODE, "adaptive")) kvPairs should contain (buildKVPair("BATCH_FILE_TYPE", "script")) kvPairs should contain (buildKVPair("BATCH_FILE_S3_URL", "")) } + + it should "contain expected command script in reconfigured script" in { + val job = generateBasicJob + job.reconfiguredScript should include (script.replace("/cromwell_root", "/tmp/scratch")) + } + + it should "add metadata environment variables to reconfigured script" in { + val job = generateJobWithS3InOut + job.reconfiguredScript should include ("export AWS_METADATA_SERVICE_TIMEOUT=10\n") + job.reconfiguredScript should include ("export AWS_METADATA_SERVICE_NUM_ATTEMPTS=10\n") + } + + it should "add s3 localize with retry function to reconfigured script" in { + val job = generateBasicJob + val retryFunctionText = s""" + |function _s3_localize_with_retry() { + | local s3_path=$$1 + | # destination must be the path to a file and not just the directory you want the file in + | local destination=$$2 + | + | for i in {1..5}; + | do + | if [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then + | bucket="$${BASH_REMATCH[1]}" + | key="$${BASH_REMATCH[2]}" + | content_length=$$(/usr/local/aws-cli/v2/current/bin/aws s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') + | else + | echo "$$s3_path is not an S3 path with a bucket and key. aborting" + | exit 1 + | fi + | /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress "$$s3_path" "$$destination" && + | [[ $$(LC_ALL=C ls -dn -- "$$destination" | awk '{print $$5; exit}') -eq "$$content_length" ]] && break || + | echo "attempt $$i to copy $$s3_path failed"; + | + | if [ "$$i" -eq 5 ]; then + | echo "failed to copy $$s3_path after $$i attempts. aborting" + | exit 2 + | fi + | sleep $$((7 * "$$i")) + | done + |} + |""".stripMargin + + job.reconfiguredScript should include (retryFunctionText) + } + + it should "generate postscript with output copy command in reconfigured script" in { + val job = generateJobWithS3InOut + val postscript = + s""" + |{ + |set -e + |echo '*** DELOCALIZING OUTPUTS ***' + | + |/usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress /tmp/scratch/baa s3://bucket/somewhere/baa + | + | + |if [ -f /tmp/scratch/hello-rc.txt ]; then /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress /tmp/scratch/hello-rc.txt ${job.jobPaths.returnCode} ; fi + | + |if [ -f /tmp/scratch/hello-stderr.log ]; then /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress /tmp/scratch/hello-stderr.log ${job.jobPaths.standardPaths.error}; fi + |if [ -f /tmp/scratch/hello-stdout.log ]; then /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress /tmp/scratch/hello-stdout.log ${job.jobPaths.standardPaths.output}; fi + | + |echo '*** COMPLETED DELOCALIZATION ***' + |echo '*** EXITING WITH RETURN CODE ***' + |rc=$$(head -n 1 /tmp/scratch/hello-rc.txt) + |echo $$rc + |exit $$rc + |} + |""".stripMargin + job.reconfiguredScript should include (postscript) + } + + it should "generate preamble with input copy command in reconfigured script" in { + val job = generateJobWithS3InOut + val preamble = + s""" + |{ + |set -e + |echo '*** LOCALIZING INPUTS ***' + |if [ ! -d /tmp/scratch ]; then mkdir /tmp/scratch && chmod 777 /tmp/scratch; fi + |cd /tmp/scratch + |_s3_localize_with_retry s3://bucket/foo /tmp/scratch/foo + |echo '*** COMPLETED LOCALIZATION ***' + |set +e + |} + |""".stripMargin + + job.reconfiguredScript should include (preamble) + } + + it should "contain AWS Service clients" in { + val job = generateBasicJob + job.batchClient should not be null + job.s3Client should not be null + job.cloudWatchLogsClient should not be null + } } From 5ac92609b30e8ee9c002c1b4722c1c66cc31d5d2 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Tue, 19 Oct 2021 10:59:13 -0400 Subject: [PATCH 027/326] Improved testing --- .../backend/impl/aws/AwsBatchJobSpec.scala | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala index 80d17688a65..12e933ab959 100644 --- a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala +++ b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala @@ -36,7 +36,7 @@ import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} import cromwell.backend.BackendSpec._ import cromwell.backend.impl.aws.io.AwsBatchWorkingDisk import cromwell.backend.validation.ContinueOnReturnCodeFlag -import cromwell.core.path.{DefaultPathBuilder} +import cromwell.core.path.DefaultPathBuilder import cromwell.core.TestKitSuite import cromwell.util.SampleWdl import eu.timepit.refined.api.Refined @@ -47,7 +47,7 @@ import org.scalatest.flatspec.AnyFlatSpecLike import org.scalatest.matchers.should.Matchers import org.specs2.mock.Mockito import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider -import software.amazon.awssdk.services.batch.model.KeyValuePair +import software.amazon.awssdk.services.batch.model.{ContainerDetail, JobDetail, KeyValuePair} import spray.json.{JsObject, JsString} import wdl4s.parser.MemoryUnit import wom.format.MemorySize @@ -58,7 +58,7 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi System.setProperty("aws.region", "us-east-1") - val script = """ + val script: String = """ |tmpDir=mkdir -p "/cromwell-aws/cromwell-execution/wf_hello/2422ea26-2578-48b0-86e9-50cbdda7d70a/call-hello/tmp.39397e83" && echo "/cromwell-aws/cromwell-execution/wf_hello/2422ea26-2578-48b0-86e9-50cbdda7d70a/call-hello/tmp.39397e83" |chmod 777 "$tmpDir" |export _JAVA_OPTIONS=-Djava.io.tmpdir="$tmpDir" @@ -121,6 +121,9 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi ulimits = Vector(Map.empty[String, String]), fileSystem = "s3") + val containerDetail: ContainerDetail = ContainerDetail.builder().exitCode(0).build() + val jobDetail: JobDetail = JobDetail.builder().container(containerDetail).build + private def generateBasicJob: AwsBatchJob = { val job = AwsBatchJob(null, runtimeAttributes, "commandLine", script, "/cromwell_root/hello-rc.txt", "/cromwell_root/hello-stdout.log", "/cromwell_root/hello-stderr.log", @@ -260,4 +263,19 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi job.s3Client should not be null job.cloudWatchLogsClient should not be null } + + it should "have correct script prefix" in { + val job = generateBasicJob + job.scriptKeyPrefix should equal("scripts/") + } + + it should "return correct RC code given Batch Job Detail" in { + val containerDetail: ContainerDetail = ContainerDetail.builder().exitCode(0).build + val jobDetail: JobDetail = JobDetail.builder().container(containerDetail).build + val job = generateBasicJob + job.rc(jobDetail) should be (0) + } + + + } From 94a720dcf146f573e35b700ecdfdb5ba1d558a2f Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Tue, 19 Oct 2021 13:31:01 -0400 Subject: [PATCH 028/326] remove unnecessary method --- .../backend/impl/aws/AwsBatchJob.scala | 37 ------------------- 1 file changed, 37 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 5068c4a9784..ee6b95b06c2 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -464,8 +464,6 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL */ def status(jobId: String): Try[RunStatus] = for { statusString <- Try(detail(jobId).status) - batchJobContainerContext <- Try(batchJobContainerContext(jobId)) - _ <- Try(Log.debug(s"Task ${jobDescriptor.key.call.fullyQualifiedName + "-" + jobDescriptor.key.index + "-" + jobDescriptor.key.attempt} in container context $batchJobContainerContext")) runStatus <- RunStatus.fromJobStatus(statusString, jobId) } yield runStatus @@ -478,41 +476,6 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL jobDetail } - /** - * Return information about the container, ECS Cluster and EC2 instance that is (or was) hosting this job - * @param jobId the id of the job for which you want the context - * @return the context - */ - def batchJobContainerContext(jobId: String): BatchJobContainerContext ={ - if (jobId == null) return BatchJobContainerContext("","",Seq.empty, Seq.empty) - - val containerInstanceArn = detail(jobId).container().containerInstanceArn() - if(containerInstanceArn == null || containerInstanceArn.isEmpty) return BatchJobContainerContext(jobId,"",Seq.empty, Seq.empty) - - val describeJobQueuesResponse = batchClient.describeJobQueues( DescribeJobQueuesRequest.builder().jobQueues( runtimeAttributes.queueArn ).build()) - val computeEnvironments = describeJobQueuesResponse.jobQueues().asScala.head.computeEnvironmentOrder().asScala.map(_.computeEnvironment()) - val describeComputeEnvironmentsResponse = batchClient.describeComputeEnvironments( DescribeComputeEnvironmentsRequest.builder().computeEnvironments(computeEnvironments.asJava).build()) - val ecsClusterArns = describeComputeEnvironmentsResponse.computeEnvironments().asScala.map(_.ecsClusterArn()) - - val ecsClient = configureClient(EcsClient.builder(), optAwsAuthMode, configRegion) - - val instanceIds: Seq[String] = ecsClusterArns.map(containerArn => ecsClient.describeContainerInstances(DescribeContainerInstancesRequest.builder().containerInstances(containerInstanceArn).cluster(containerArn).build())) - .map(r => r.containerInstances().asScala).flatMap(_.map(_.ec2InstanceId())) - - BatchJobContainerContext(jobId, containerInstanceArn, ecsClusterArns, instanceIds) - } - - case class BatchJobContainerContext(jobId: String, containerInstanceArn: String, ecsClusterArns: Seq[String], ec2InstanceIds: Seq[String]) { - override def toString: String = { - new ToStringBuilder(this, ToStringStyle.JSON_STYLE) - .append("jobId", this.jobId) - .append("containerInstanceArn", containerInstanceArn) - .append("ecsClusterArns", ecsClusterArns) - .append("ec2InstanceIds", ec2InstanceIds) - .build() - } - } - def rc(detail: JobDetail): Integer = { detail.container.exitCode } From 19461a398deca98d854c4c71fb87a58b3fc02263 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Fri, 22 Oct 2021 17:32:53 -0400 Subject: [PATCH 029/326] enable hashing of ECR and ECR public fix deprecation in job definition creation --- .../main/scala/cloud/nio/spi/UnixPath.scala | 2 +- core/src/main/resources/reference.conf | 2 + .../cromwell/docker/DockerInfoActor.scala | 5 ++- .../registryv2/DockerRegistryV2Abstract.scala | 16 +++---- .../registryv2/flows/aws/AmazonEcr.scala | 42 +++++++++++++++++++ .../flows/aws/AmazonEcrAbstract.scala | 41 ++++++++++++++++++ .../flows/aws/AmazonEcrPublic.scala | 36 ++++++++++++++++ .../registryv2/flows/aws/EcrUtils.scala | 9 ++++ .../flows/aws/AmazonEcrPublicSpec.scala | 25 +++++++++++ project/Dependencies.scala | 10 +++-- project/Merging.scala | 23 ++++++++-- .../backend/impl/aws/AwsBatchJob.scala | 10 ++--- .../impl/aws/AwsBatchJobDefinition.scala | 10 ++--- 13 files changed, 204 insertions(+), 27 deletions(-) create mode 100644 dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcr.scala create mode 100644 dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrAbstract.scala create mode 100644 dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublic.scala create mode 100644 dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/EcrUtils.scala create mode 100644 dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublicSpec.scala diff --git a/cloud-nio/cloud-nio-spi/src/main/scala/cloud/nio/spi/UnixPath.scala b/cloud-nio/cloud-nio-spi/src/main/scala/cloud/nio/spi/UnixPath.scala index a5d0770bc56..306afa44cc7 100644 --- a/cloud-nio/cloud-nio-spi/src/main/scala/cloud/nio/spi/UnixPath.scala +++ b/cloud-nio/cloud-nio-spi/src/main/scala/cloud/nio/spi/UnixPath.scala @@ -69,7 +69,7 @@ final private[spi] case class UnixPath(path: String) extends CharSequence { def isAbsolute: Boolean = UnixPath.isAbsolute(path) - override def isEmpty: Boolean = path.isEmpty + def isEmpty: Boolean = path.isEmpty def hasTrailingSeparator: Boolean = UnixPath.hasTrailingSeparator(path) diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index 709753e3fb8..e436d85c89f 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -388,6 +388,8 @@ docker { dockerhub.num-threads = 10 quay.num-threads = 10 alibabacloudcr.num-threads = 10 + ecr.num-threads = 10 + ecr-public.num-threads = 10 } } diff --git a/dockerHashing/src/main/scala/cromwell/docker/DockerInfoActor.scala b/dockerHashing/src/main/scala/cromwell/docker/DockerInfoActor.scala index faa3cd4c36a..3a56294bccf 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/DockerInfoActor.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/DockerInfoActor.scala @@ -15,6 +15,7 @@ import cromwell.core.{Dispatcher, DockerConfiguration} import cromwell.docker.DockerInfoActor._ import cromwell.docker.registryv2.flows.alibabacloudcrregistry._ import cromwell.docker.registryv2.DockerRegistryV2Abstract +import cromwell.docker.registryv2.flows.aws.{AmazonEcr, AmazonEcrPublic} import cromwell.docker.registryv2.flows.dockerhub.DockerHubRegistry import cromwell.docker.registryv2.flows.gcr.GcrRegistry import cromwell.docker.registryv2.flows.quay.QuayRegistry @@ -240,7 +241,9 @@ object DockerInfoActor { ("dockerhub", { c: DockerRegistryConfig => new DockerHubRegistry(c) }), ("gcr", gcrConstructor), ("quay", { c: DockerRegistryConfig => new QuayRegistry(c) }), - ("alibabacloudcr", {c: DockerRegistryConfig => new AlibabaCloudCRRegistry(c)}) + ("alibabacloudcr", {c: DockerRegistryConfig => new AlibabaCloudCRRegistry(c)}), + ("ecr", {c: DockerRegistryConfig => new AmazonEcr(c)}), + ("ecr-public", {c: DockerRegistryConfig => new AmazonEcrPublic(c)}) ).traverse[ErrorOr, DockerRegistry]({ case (configPath, constructor) => DockerRegistryConfig.fromConfig(config.as[Config](configPath)).map(constructor) }).unsafe("Docker registry configuration") diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/DockerRegistryV2Abstract.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/DockerRegistryV2Abstract.scala index e03b3bee3c5..b4ae630751b 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/registryv2/DockerRegistryV2Abstract.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/DockerRegistryV2Abstract.scala @@ -60,7 +60,7 @@ object DockerRegistryV2Abstract { ) }) } - + // Placeholder exceptions that can be carried through IO before being converted to a DockerInfoFailedResponse private class Unauthorized() extends Exception private class NotFound() extends Exception @@ -76,6 +76,8 @@ abstract class DockerRegistryV2Abstract(override val config: DockerRegistryConfi implicit val cs = IO.contextShift(ec) implicit val timer = IO.timer(ec) + protected val authorizationScheme: AuthScheme = AuthScheme.Bearer + /** * This is the main function. Given a docker context and an http client, retrieve information about the docker image. */ @@ -204,7 +206,7 @@ abstract class DockerRegistryV2Abstract(override val config: DockerRegistryConfi * Request to get the manifest, using the auth token if provided */ private def manifestRequest(token: Option[String], imageId: DockerImageIdentifier): IO[Request[IO]] = { - val authorizationHeader = token.map(t => Authorization(Credentials.Token(AuthScheme.Bearer, t))) + val authorizationHeader = token.map(t => Authorization(Credentials.Token(authorizationScheme, t))) val request = Method.GET( buildManifestUri(imageId), List( @@ -235,13 +237,13 @@ abstract class DockerRegistryV2Abstract(override val config: DockerRegistryConfi * The response can be of 2 sorts: * - A manifest (https://docs.docker.com/registry/spec/manifest-v2-2/#image-manifest-field-descriptions) * - A manifest list which contains a list of pointers to other manifests (https://docs.docker.com/registry/spec/manifest-v2-2/#manifest-list) - * + * * When a manifest list is returned, we need to pick one of the manifest pointers and make another request for that manifest. - * + * * Because the different manifests in the list are (supposed to be) variations of the same image over different platforms, * we simply pick the first one here since we only care about the approximate size, and we don't expect it to change drastically * between platforms. - * If that assumption turns out to be incorrect, a smarter decision may need to be made to choose the manifest to lookup. + * If that assumption turns out to be incorrect, a smarter decision may need to be made to choose the manifest to lookup. */ private def parseManifest(dockerImageIdentifier: DockerImageIdentifier, token: Option[String])(response: Response[IO])(implicit client: Client[IO]): IO[Option[DockerManifest]] = response match { case Status.Successful(r) if r.headers.exists(_.value.equalsIgnoreCase(ManifestV2MediaType)) => @@ -268,14 +270,14 @@ abstract class DockerRegistryV2Abstract(override val config: DockerRegistryConfi } } - private def getDigestFromResponse(response: Response[IO]): IO[DockerHashResult] = response match { + protected def getDigestFromResponse(response: Response[IO]): IO[DockerHashResult] = response match { case Status.Successful(r) => extractDigestFromHeaders(r.headers) case Status.Unauthorized(_) => IO.raiseError(new Unauthorized) case Status.NotFound(_) => IO.raiseError(new NotFound) case failed => failed.as[String].flatMap(body => IO.raiseError(new Exception(s"Failed to get manifest: $body")) ) } - + private def extractDigestFromHeaders(headers: Headers) = { headers.find(a => a.toRaw.name.equals(DigestHeaderName)) match { case Some(digest) => IO.fromEither(DockerHashResult.fromString(digest.value).toEither) diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcr.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcr.scala new file mode 100644 index 00000000000..a073b9f4eb2 --- /dev/null +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcr.scala @@ -0,0 +1,42 @@ +package cromwell.docker.registryv2.flows.aws + +import cats.effect.IO +import cromwell.docker.{DockerImageIdentifier, DockerInfoActor, DockerRegistryConfig} +import org.http4s.AuthScheme +import org.http4s.client.Client +import software.amazon.awssdk.services.ecr.EcrClient + +import scala.compat.java8.OptionConverters._ +import scala.concurrent.Future + +class AmazonEcr(override val config: DockerRegistryConfig, ecrClient: EcrClient = EcrClient.create()) extends AmazonEcrAbstract(config) { + + override protected val authorizationScheme: AuthScheme = AuthScheme.Basic + + /** + * e.g 123456789012.dkr.ecr.us-east-1.amazonaws.com + */ + override protected def registryHostName(dockerImageIdentifier: DockerImageIdentifier): String = { + var hostname = dockerImageIdentifier.hostAsString + if (hostname.lastIndexOf("/").equals(hostname.length -1)) { + hostname = hostname.substring(0, hostname.length -1) + } + hostname + } + /** + * Returns true if this flow is able to process this docker image, + * false otherwise + */ + override def accepts(dockerImageIdentifier: DockerImageIdentifier): Boolean = dockerImageIdentifier.hostAsString.contains("amazonaws.com") + + override protected def getToken(dockerInfoContext: DockerInfoActor.DockerInfoContext)(implicit client: Client[IO]): IO[Option[String]] = { + val eventualMaybeToken = Future(ecrClient.getAuthorizationToken + .authorizationData() + .stream() + .findFirst() + .asScala + .map(_.authorizationToken())) + + IO.fromFuture(IO(eventualMaybeToken)) + } +} diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrAbstract.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrAbstract.scala new file mode 100644 index 00000000000..bc9ea61fd74 --- /dev/null +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrAbstract.scala @@ -0,0 +1,41 @@ +package cromwell.docker.registryv2.flows.aws + +import cats.effect.IO +import cromwell.docker.{DockerHashResult, DockerImageIdentifier, DockerInfoActor, DockerRegistryConfig} +import cromwell.docker.registryv2.DockerRegistryV2Abstract +import cromwell.docker.registryv2.flows.aws.EcrUtils.{EcrForbidden, EcrNotFound, EcrUnauthorized} +import org.apache.commons.codec.digest.DigestUtils +import org.http4s.{Header, Response, Status} + +abstract class AmazonEcrAbstract(override val config: DockerRegistryConfig) extends DockerRegistryV2Abstract(config) { + + /** + * Not used as getToken is overridden + */ + override protected def authorizationServerHostName(dockerImageIdentifier: DockerImageIdentifier): String = "" + + /** + * Not used as getToken is overridden + */ + override protected def buildTokenRequestHeaders(dockerInfoContext: DockerInfoActor.DockerInfoContext): List[Header] = List.empty + + /** + * Amazon ECR repositories don't have a digest header in responses so we must made it from the manifest body + */ + override protected def getDigestFromResponse(response: Response[IO]): IO[DockerHashResult] = response match { + case Status.Successful(r) => digestManifest(r.bodyText) + case Status.Unauthorized(_) => IO.raiseError(new EcrUnauthorized) + case Status.NotFound(_) => IO.raiseError(new EcrNotFound) + case Status.Forbidden(_) => IO.raiseError(new EcrForbidden) + case failed => failed.as[String].flatMap(body => IO.raiseError(new Exception(s"Failed to get manifest: $body"))) + } + + private def digestManifest(bodyText: fs2.Stream[IO, String]): IO[DockerHashResult] = { + bodyText + .compile + .string + .map(data => "sha256:"+DigestUtils.sha256Hex(data)) + .map(DockerHashResult.fromString) + .flatMap(IO.fromTry) + } +} diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublic.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublic.scala new file mode 100644 index 00000000000..797d4b70703 --- /dev/null +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublic.scala @@ -0,0 +1,36 @@ +package cromwell.docker.registryv2.flows.aws + +import cats.effect.IO +import cromwell.docker.{DockerImageIdentifier, DockerInfoActor, DockerRegistryConfig} +import org.http4s.client.Client +import software.amazon.awssdk.services.ecrpublic.EcrPublicClient +import software.amazon.awssdk.services.ecrpublic.model.GetAuthorizationTokenRequest + +import scala.concurrent.Future + + +class AmazonEcrPublic(override val config: DockerRegistryConfig, ecrClient: EcrPublicClient = EcrPublicClient.create()) extends AmazonEcrAbstract(config) { + /** + * public.ecr.aws + */ + override protected def registryHostName(dockerImageIdentifier: DockerImageIdentifier): String = "public.ecr.aws" + + /** + * Returns true if this flow is able to process this docker image, + * false otherwise + */ + override def accepts(dockerImageIdentifier: DockerImageIdentifier): Boolean = dockerImageIdentifier.hostAsString.contains("public.ecr.aws") + + + override protected def getToken(dockerInfoContext: DockerInfoActor.DockerInfoContext)(implicit client: Client[IO]): IO[Option[String]] = { + + val eventualMaybeToken: Future[Option[String]] = Future( + Option(ecrClient + .getAuthorizationToken(GetAuthorizationTokenRequest.builder().build()) + .authorizationData.authorizationToken() + ) + ) + + IO.fromFuture(IO(eventualMaybeToken)) + } +} diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/EcrUtils.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/EcrUtils.scala new file mode 100644 index 00000000000..9da904c8adc --- /dev/null +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/EcrUtils.scala @@ -0,0 +1,9 @@ +package cromwell.docker.registryv2.flows.aws + +object EcrUtils { + + case class EcrUnauthorized() extends Exception + case class EcrNotFound() extends Exception + case class EcrForbidden() extends Exception + +} diff --git a/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublicSpec.scala b/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublicSpec.scala new file mode 100644 index 00000000000..7d76aff61a5 --- /dev/null +++ b/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublicSpec.scala @@ -0,0 +1,25 @@ +package cromwell.docker.registryv2.flows.aws + +import cromwell.core.TestKitSuite +import cromwell.docker.{DockerImageIdentifier, DockerRegistryConfig} +import org.scalatest.BeforeAndAfter +import org.scalatest.flatspec.AnyFlatSpecLike +import org.scalatest.matchers.should.Matchers +import org.scalatestplus.mockito.MockitoSugar + +class AmazonEcrPublicSpec extends TestKitSuite with AnyFlatSpecLike with Matchers with MockitoSugar with BeforeAndAfter{ + val goodUri = "public.ecr.aws/amazonlinux/amazonlinux:latest" + val badUri = "ubuntu:latest" + val registry = new AmazonEcrPublic(DockerRegistryConfig.default) + + + it should "Accept good URI" in { + val dockerImageIdentifier = DockerImageIdentifier.fromString(goodUri).get + registry.accepts(dockerImageIdentifier) shouldEqual(true) + } + + it should "NOT accept bad URI" in { + val dockerImageIdentifier = DockerImageIdentifier.fromString(badUri).get + registry.accepts(dockerImageIdentifier) shouldEqual(false) + } +} diff --git a/project/Dependencies.scala b/project/Dependencies.scala index ed80d57b886..31cafa994d3 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -11,7 +11,7 @@ object Dependencies { private val ammoniteOpsV = "2.3.8" private val apacheCommonNetV = "3.7.2" private val apacheHttpClientV = "4.5.13" - private val awsSdkV = "2.15.41" + private val awsSdkV = "2.17.66" private val betterFilesV = "3.9.1" private val catsEffectV = "2.3.0" private val catsV = "2.3.0" @@ -56,7 +56,7 @@ object Dependencies { private val heterodonV = "1.0.0-beta3" private val hsqldbV = "2.5.1" private val http4sVersion = "0.21.7" // scala-steward:off (CROM-6678) - private val jacksonV = "2.12.2" + private val jacksonV = "2.12.5" private val jacksonJqV = "1.0.0-preview.20201123" private val janinoV = "3.1.2" private val javaxActivationV = "1.2.0" @@ -311,7 +311,9 @@ object Dependencies { "cloudwatchlogs", "s3", "sts", - "ecs" + "ecs", + "ecr", + "ecrpublic", ).map(artifactName => "software.amazon.awssdk" % artifactName % awsSdkV) private val googleCloudDependencies = List( @@ -502,7 +504,7 @@ object Dependencies { val databaseMigrationDependencies = liquibaseDependencies ++ dbmsDependencies - val dockerHashingDependencies = http4sDependencies ++ circeDependencies ++ aliyunCrDependencies + val dockerHashingDependencies = http4sDependencies ++ circeDependencies ++ aliyunCrDependencies ++ awsCloudDependencies val cromwellApiClientDependencies = List( "org.scalaz" %% "scalaz-core" % scalazV, diff --git a/project/Merging.scala b/project/Merging.scala index 964d342a234..53135adce26 100644 --- a/project/Merging.scala +++ b/project/Merging.scala @@ -4,8 +4,8 @@ import sbtassembly.{MergeStrategy, PathList} object Merging { val customMergeStrategy: Def.Initialize[String => MergeStrategy] = Def.setting { - case PathList(ps@_*) if ps.last == "project.properties" => - // Merge/Filter project.properties files from Google jars that otherwise collide at merge time. + case PathList(ps@_*) if Set("project.properties", "execution.interceptors").contains(ps.last) => + // Merge/Filter files from AWS/Google jars that otherwise collide at merge time. MergeStrategy.filterDistinctLines case PathList(ps@_*) if ps.last == "logback.xml" => MergeStrategy.first @@ -16,12 +16,27 @@ object Merging { path map { _.toLowerCase } match { - case "spring.tooling" :: xs => + case "spring.tooling" :: _ => MergeStrategy.discard case "io.netty.versions.properties" :: Nil => MergeStrategy.first - case "maven" :: "com.google.guava" :: xs => + case "maven" :: "com.google.guava" :: _ => MergeStrategy.first + case "native-image" :: _ if Set("native-image.properties", "reflection-config.json").contains(path.last) => + /* + Discard GraalVM configuration files. + grpc-netty-shaded 1.39.0 tried to put the netty classes into a different package, but left the shaded version + of the config file with the same name as the unshaded netty library. Thus when merging the shaded and + unshaded netty jars we end up with assembly conflicts. + + However, we're not using GraalVM for execution so just discard the configuration files. + + See also: + - https://www.graalvm.org/reference-manual/native-image/BuildConfiguration/#configuration-file-format + - https://github.com/grpc/grpc-java/issues/7540 + - https://github.com/grpc/grpc-java/releases/tag/v1.39.0 + */ + MergeStrategy.discard case _ => val oldStrategy = (assembly / assemblyMergeStrategy).value oldStrategy(x) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index ee6b95b06c2..3102f678aa6 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -47,8 +47,6 @@ import software.amazon.awssdk.services.batch.BatchClient import software.amazon.awssdk.services.batch.model._ import software.amazon.awssdk.services.cloudwatchlogs.CloudWatchLogsClient import software.amazon.awssdk.services.cloudwatchlogs.model.{GetLogEventsRequest, OutputLogEvent} -import software.amazon.awssdk.services.ecs.EcsClient -import software.amazon.awssdk.services.ecs.model.DescribeContainerInstancesRequest import software.amazon.awssdk.services.s3.S3Client import software.amazon.awssdk.services.s3.model.{GetObjectRequest, HeadObjectRequest, NoSuchKeyException, PutObjectRequest} import wdl4s.parser.MemoryUnit @@ -289,11 +287,13 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL .containerOverrides( ContainerOverrides.builder .environment( - generateEnvironmentKVPairs(runtimeAttributes.scriptS3BucketName, scriptKeyPrefix, scriptKey): _* ) - .memory(runtimeAttributes.memory.to(MemoryUnit.MB).amount.toInt) - .vcpus(runtimeAttributes.cpu.##).build + .resourceRequirements( + ResourceRequirement.builder().`type`(ResourceType.VCPU).value(runtimeAttributes.cpu.##.toString).build(), + ResourceRequirement.builder().`type`(ResourceType.MEMORY).value(runtimeAttributes.memory.to(MemoryUnit.MB).amount.toInt.toString).build() + ) + .build() ) .jobQueue(runtimeAttributes.queueArn) .jobDefinition(definitionArn) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala index 876a23c32c0..b8ac5cb229a 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala @@ -31,16 +31,14 @@ package cromwell.backend.impl.aws -import scala.language.postfixOps import scala.collection.mutable.ListBuffer import cromwell.backend.BackendJobDescriptor import cromwell.backend.io.JobPaths -import software.amazon.awssdk.services.batch.model.{ContainerProperties, Host, KeyValuePair, MountPoint, RetryStrategy, Volume, Ulimit} +import software.amazon.awssdk.services.batch.model.{ContainerProperties, Host, KeyValuePair, MountPoint, ResourceRequirement, ResourceType, RetryStrategy, Ulimit, Volume} import cromwell.backend.impl.aws.io.AwsBatchVolume import scala.collection.JavaConverters._ import java.security.MessageDigest - import org.apache.commons.lang3.builder.{ToStringBuilder, ToStringStyle} import org.slf4j.{Logger, LoggerFactory} import wdl4s.parser.MemoryUnit @@ -162,8 +160,10 @@ trait AwsBatchJobDefinitionBuilder { (ContainerProperties.builder() .image(context.runtimeAttributes.dockerImage) .command(packedCommand.asJava) - .memory(context.runtimeAttributes.memory.to(MemoryUnit.MB).amount.toInt) - .vcpus(context.runtimeAttributes.cpu##) + .resourceRequirements( + ResourceRequirement.builder().`type`(ResourceType.VCPU).value(context.runtimeAttributes.cpu.##.toString).build(), + ResourceRequirement.builder().`type`(ResourceType.MEMORY).value(context.runtimeAttributes.memory.to(MemoryUnit.MB).amount.toInt.toString).build() + ) .volumes(volumes.asJava) .mountPoints(mountPoints.asJava) .environment(environment.asJava) From 29e403937d5b4614038bb4da0bc8f313307a9f0e Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Tue, 26 Oct 2021 15:34:20 -0400 Subject: [PATCH 030/326] improve test coverage for AmazonEcr and AmazonEcrPublic --- .../flows/aws/AmazonEcrPublicSpec.scala | 62 +++++++++++++--- .../registryv2/flows/aws/AmazonEcrSpec.scala | 72 +++++++++++++++++++ 2 files changed, 125 insertions(+), 9 deletions(-) create mode 100644 dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrSpec.scala diff --git a/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublicSpec.scala b/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublicSpec.scala index 7d76aff61a5..18a0a23232e 100644 --- a/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublicSpec.scala +++ b/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublicSpec.scala @@ -1,25 +1,69 @@ package cromwell.docker.registryv2.flows.aws +import cats.effect.{IO, Resource} import cromwell.core.TestKitSuite -import cromwell.docker.{DockerImageIdentifier, DockerRegistryConfig} -import org.scalatest.BeforeAndAfter +import cromwell.docker.registryv2.DockerRegistryV2Abstract +import cromwell.docker.{DockerImageIdentifier, DockerInfoActor, DockerInfoRequest, DockerRegistryConfig} +import org.http4s.{Header, Headers, MediaType, Request, Response} +import org.http4s.client.Client +import org.http4s.headers.`Content-Type` +import org.mockito.ArgumentMatchers.any +import org.mockito.Mockito._ +import org.scalatest.{BeforeAndAfter, PrivateMethodTester} import org.scalatest.flatspec.AnyFlatSpecLike import org.scalatest.matchers.should.Matchers import org.scalatestplus.mockito.MockitoSugar +import software.amazon.awssdk.services.ecrpublic.model.{AuthorizationData, GetAuthorizationTokenRequest, GetAuthorizationTokenResponse} +import software.amazon.awssdk.services.ecrpublic.EcrPublicClient + +class AmazonEcrPublicSpec extends TestKitSuite with AnyFlatSpecLike with Matchers with MockitoSugar with BeforeAndAfter with PrivateMethodTester { + behavior of "AmazonEcrPublic" -class AmazonEcrPublicSpec extends TestKitSuite with AnyFlatSpecLike with Matchers with MockitoSugar with BeforeAndAfter{ val goodUri = "public.ecr.aws/amazonlinux/amazonlinux:latest" - val badUri = "ubuntu:latest" - val registry = new AmazonEcrPublic(DockerRegistryConfig.default) + val otherUri = "ubuntu:latest" + + + val mediaType: MediaType = MediaType.parse(DockerRegistryV2Abstract.ManifestV2MediaType).right.get + val contentType: Header = `Content-Type`(mediaType) + val mockEcrClient: EcrPublicClient = mock[EcrPublicClient] + implicit val mockIOClient: Client[IO] = Client({ _: Request[IO] => + // This response will have an empty body, so we need to be explicit about the typing: + Resource.pure[IO, Response[IO]](Response(headers = Headers.of(contentType))) : Resource[IO, Response[IO]] + }) + val registry = new AmazonEcrPublic(DockerRegistryConfig.default, mockEcrClient) it should "Accept good URI" in { val dockerImageIdentifier = DockerImageIdentifier.fromString(goodUri).get - registry.accepts(dockerImageIdentifier) shouldEqual(true) + registry.accepts(dockerImageIdentifier) shouldEqual true + } + + it should "NOT accept other URI" in { + val dockerImageIdentifier = DockerImageIdentifier.fromString(otherUri).get + registry.accepts(dockerImageIdentifier) shouldEqual false } - it should "NOT accept bad URI" in { - val dockerImageIdentifier = DockerImageIdentifier.fromString(badUri).get - registry.accepts(dockerImageIdentifier) shouldEqual(false) + it should "have public.ecr.aws as registryHostName" in { + val registryHostNameMethod = PrivateMethod[String]('registryHostName) + registry invokePrivate registryHostNameMethod(DockerImageIdentifier.fromString(goodUri).get) shouldEqual "public.ecr.aws" + } + + it should "return expected auth token" in { + val token = "auth-token" + val imageId = DockerImageIdentifier.fromString(goodUri).get + val dockerInfoRequest = DockerInfoRequest(imageId) + val context = DockerInfoActor.DockerInfoContext(request = dockerInfoRequest, replyTo = emptyActor) + + when(mockEcrClient.getAuthorizationToken(any[GetAuthorizationTokenRequest]())) + .thenReturn(GetAuthorizationTokenResponse + .builder() + .authorizationData(AuthorizationData + .builder() + .authorizationToken(token) + .build()) + .build) + + val getTokenMethod = PrivateMethod[IO[Option[String]]]('getToken) + registry invokePrivate getTokenMethod(context, mockIOClient) ensuring(io => io.unsafeRunSync().get == token) } } diff --git a/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrSpec.scala b/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrSpec.scala new file mode 100644 index 00000000000..5ddf98c7ffa --- /dev/null +++ b/dockerHashing/src/test/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrSpec.scala @@ -0,0 +1,72 @@ +package cromwell.docker.registryv2.flows.aws + +import cats.effect.{IO, Resource} +import cromwell.core.TestKitSuite +import cromwell.docker.registryv2.DockerRegistryV2Abstract +import cromwell.docker.{DockerImageIdentifier, DockerInfoActor, DockerInfoRequest, DockerRegistryConfig} +import org.http4s.{AuthScheme, Header, Headers, MediaType, Request, Response} +import org.http4s.client.Client +import org.http4s.headers.`Content-Type` +import org.mockito.Mockito._ +import org.scalatest.{BeforeAndAfter, PrivateMethodTester} +import org.scalatest.flatspec.AnyFlatSpecLike +import org.scalatest.matchers.should.Matchers +import org.scalatestplus.mockito.MockitoSugar +import software.amazon.awssdk.services.ecr.EcrClient +import software.amazon.awssdk.services.ecr.model.{AuthorizationData, GetAuthorizationTokenResponse} + +class AmazonEcrSpec extends TestKitSuite with AnyFlatSpecLike with Matchers with MockitoSugar with BeforeAndAfter with PrivateMethodTester{ + behavior of "AmazonEcr" + + val goodUri = "123456789012.dkr.ecr.us-east-1.amazonaws.com/amazonlinux/amazonlinux:latest" + val otherUri = "ubuntu:latest" + + val mediaType: MediaType = MediaType.parse(DockerRegistryV2Abstract.ManifestV2MediaType).right.get + val contentType: Header = `Content-Type`(mediaType) + val mockEcrClient: EcrClient = mock[EcrClient] + implicit val mockIOClient: Client[IO] = Client({ _: Request[IO] => + // This response will have an empty body, so we need to be explicit about the typing: + Resource.pure[IO, Response[IO]](Response(headers = Headers.of(contentType))) : Resource[IO, Response[IO]] + }) + + val registry = new AmazonEcr(DockerRegistryConfig.default, mockEcrClient) + + it should "accept good URI" in { + val dockerImageIdentifier = DockerImageIdentifier.fromString(goodUri).get + registry.accepts(dockerImageIdentifier) shouldEqual true + } + + it should "NOT accept other URI" in { + val dockerImageIdentifier = DockerImageIdentifier.fromString(otherUri).get + registry.accepts(dockerImageIdentifier) shouldEqual false + } + + it should "use Basic Auth Scheme" in { + val authSchemeMethod = PrivateMethod[AuthScheme]('authorizationScheme) + registry invokePrivate authSchemeMethod() shouldEqual AuthScheme.Basic + } + + it should "return 123456789012.dkr.ecr.us-east-1.amazonaws.com as registryHostName" in { + val registryHostNameMethod = PrivateMethod[String]('registryHostName) + registry invokePrivate registryHostNameMethod(DockerImageIdentifier.fromString(goodUri).get) shouldEqual "123456789012.dkr.ecr.us-east-1.amazonaws.com" + } + + it should "return expected auth token" in { + val token = "auth-token" + val imageId = DockerImageIdentifier.fromString(goodUri).get + val dockerInfoRequest = DockerInfoRequest(imageId) + val context = DockerInfoActor.DockerInfoContext(request = dockerInfoRequest, replyTo = emptyActor) + + when(mockEcrClient.getAuthorizationToken) + .thenReturn(GetAuthorizationTokenResponse + .builder() + .authorizationData(AuthorizationData + .builder() + .authorizationToken(token) + .build()) + .build) + + val getTokenMethod = PrivateMethod[IO[Option[String]]]('getToken) + registry invokePrivate getTokenMethod(context, mockIOClient) ensuring(io => io.unsafeRunSync().get == token) + } +} From ca1dba36abd29f75b77546243985ef175e4ec4a0 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Thu, 30 Dec 2021 12:54:53 +0000 Subject: [PATCH 031/326] working version of local filesystem with fsx too --- ...wsBatchAsyncBackendJobExecutionActor.scala | 26 ++++++++++++--- .../backend/impl/aws/AwsBatchAttributes.scala | 19 +++++++++-- .../impl/aws/AwsBatchConfiguration.scala | 4 +-- .../backend/impl/aws/AwsBatchJob.scala | 9 ++++-- .../aws/AwsBatchJobCachingActorHelper.scala | 26 +++++++++++++-- .../impl/aws/AwsBatchJobDefinition.scala | 32 +++++++++++++++---- 6 files changed, 95 insertions(+), 21 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 7a36946e78d..49602106660 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -124,8 +124,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar override lazy val dockerImageUsed: Option[String] = Option(jobDockerImage) private lazy val execScript = - s"""|#!$jobShell - |${jobPaths.script.pathWithoutScheme} + s"""|${jobPaths.script.pathWithoutScheme} |""".stripMargin @@ -178,7 +177,8 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar generateAwsBatchOutputs(jobDescriptor), jobPaths, Seq.empty[AwsBatchParameter], configuration.awsConfig.region, - Option(configuration.awsAuth)) + Option(configuration.awsAuth), + configuration.fsxFileSystem) } /* Tries to abort the job in flight * @@ -281,14 +281,30 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar def getAbsolutePath(path: Path) = { configuration.fileSystem match { case AWSBatchStorageSystems.s3 => AwsBatchWorkingDisk.MountPoint.resolve(path) - case _ => DefaultPathBuilder.get(configuration.root).resolve(path) + // case _ => DefaultPathBuilder.get(configuration.root).resolve(path) + case _ => AwsBatchWorkingDisk.MountPoint.resolve(path) } - } + } + + val p = DefaultPathBuilder.get(path) + println("!!!!!!!!!!!!!!!!!!!!!!!!! relativePathAndVolume") + println(p.toString()) + println(p.isAbsolute) + println(getAbsolutePath(p).toString()) + println(configuration.root) + println(DefaultPathBuilder.get(configuration.root).toString()) + val absolutePath = DefaultPathBuilder.get(path) match { case p if !p.isAbsolute => getAbsolutePath(p) case p => p } + println(absolutePath.toString()) + disks.map(x=>{ + println(x.mountPoint.toString()) + println("--------------") + }) + disks.find(d => absolutePath.startsWith(d.mountPoint)) match { case Some(disk) => (disk.mountPoint.relativize(absolutePath), disk) case None => diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala index 26f69c4e79a..55da57c90e2 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala @@ -59,7 +59,8 @@ case class AwsBatchAttributes(fileSystem: String, executionBucket: String, duplicationStrategy: AwsBatchCacheHitDuplicationStrategy, submitAttempts: Int Refined Positive, - createDefinitionAttempts: Int Refined Positive) + createDefinitionAttempts: Int Refined Positive, + fsxFileSystem: Option[List[String]]) object AwsBatchAttributes { lazy val Logger = LoggerFactory.getLogger(this.getClass) @@ -90,6 +91,14 @@ object AwsBatchAttributes { deprecatedKeys foreach { key => logger.warn(s"Found deprecated configuration key $key, replaced with ${deprecated.get(key)}") } } + def parseFSx(backendConfig: Config): Option[List[String]] = { + val fsxConfig = backendConfig.getStringList("filesystems.fsx") + fsxConfig.isEmpty match { + case true => None + case false => Some(fsxConfig.asScala.toList) + } + } + warnDeprecated(configKeys, deprecatedAwsBatchKeys, context, Logger) val executionBucket: ErrorOr[String] = validate { backendConfig.as[String]("root") } @@ -123,6 +132,11 @@ object AwsBatchAttributes { case other => throw new IllegalArgumentException(s"Unrecognized caching duplication strategy: $other. Supported strategies are copy and reference. See reference.conf for more details.") } } + + val fsx: ErrorOr[Option[List[String]]] = validate {backendConfig.hasPath("filesystems.fsx") match { + case true => parseFSx(backendConfig) + case false => None + }} ( fileSysStr, @@ -130,7 +144,8 @@ object AwsBatchAttributes { executionBucket, duplicationStrategy, backendConfig.as[ErrorOr[Int Refined Positive]]("numSubmitAttempts"), - backendConfig.as[ErrorOr[Int Refined Positive]]("numCreateDefinitionAttempts") + backendConfig.as[ErrorOr[Int Refined Positive]]("numCreateDefinitionAttempts"), + fsx ).tupled.map((AwsBatchAttributes.apply _).tupled) match { case Valid(r) => r case Invalid(f) => diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala index 2bc76d4bb0b..b9278485305 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala @@ -53,9 +53,9 @@ class AwsBatchConfiguration(val configurationDescriptor: BackendConfigurationDes } val pathBuilderFactory = configurationDescriptor.backendConfig.hasPath("filesystems.s3") match { case true => S3PathBuilderFactory(configurationDescriptor.globalConfig, configurationDescriptor.backendConfig) - case false => - PathBuilderFactory + case false => PathBuilderFactory } + val fsxFileSystem = batchAttributes.fsxFileSystem } object AWSBatchStorageSystems { diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 90f1918b9da..eb4680662a2 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -31,6 +31,7 @@ package cromwell.backend.impl.aws import java.security.MessageDigest +import java.nio.file.attribute.PosixFilePermission import cats.data.ReaderT._ import cats.data.{Kleisli, ReaderT} @@ -82,7 +83,8 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL jobPaths: JobPaths, // Based on config, calculated in Job Paths, key to all things outside container parameters: Seq[AwsBatchParameter], configRegion: Option[Region], - optAwsAuthMode: Option[AwsAuthMode] = None + optAwsAuthMode: Option[AwsAuthMode] = None, + fsxFileSystem: Option[List[String]] ) { // values for container environment @@ -241,6 +243,8 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL val regex = "s3://([^/]*)/(.*)".r val regex(bucketName, key) = jobPaths.callExecutionRoot.toString writeReconfiguredScriptForAudit(reconfiguredScript, bucketName, key+"/reconfigured-script.sh") + }else{ + jobPaths.script.addPermission(PosixFilePermission.OTHERS_EXECUTE) } @@ -367,7 +371,8 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL jobDescriptor = jobDescriptor, jobPaths = jobPaths, inputs = inputs, - outputs = outputs) + outputs = outputs, + fsxFileSystem = fsxFileSystem) val jobDefinitionBuilder = StandardAwsBatchJobDefinitionBuilder val jobDefinition = jobDefinitionBuilder.build(jobDefinitionContext) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala index 903f88a1cc4..a9c416563d3 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala @@ -55,12 +55,32 @@ trait AwsBatchJobCachingActorHelper extends StandardCachingActorHelper { lazy val runtimeAttributes: AwsBatchRuntimeAttributes = AwsBatchRuntimeAttributes(validatedRuntimeAttributes, configuration.runtimeConfig, configuration.fileSystem) + println("!!!!!!!!!!!!!!!!!!! DISKS") + println(runtimeAttributes.disks) + runtimeAttributes.disks.map(x =>{ + println(x.fsType) + println(x.mountPoint) + println(x.name) + }) + lazy val workingDisk: AwsBatchVolume = runtimeAttributes.disks.find(x => configuration.fileSystem match { - case AWSBatchStorageSystems.s3 => x.name == AwsBatchWorkingDisk.Name - case _ => configuration.root.startsWith(x.mountPoint.pathAsString) + case AWSBatchStorageSystems.s3 => { + println("!!!!!!!!!!!!!!!!!! S3") + println(x.name) + println(AwsBatchWorkingDisk.Name) + x.name == AwsBatchWorkingDisk.Name + } + case _ => { + println("!!!!!!!!!!!!!!!!!! LOCAL") + println(configuration.root) + println(x.mountPoint.pathAsString) + println(AwsBatchWorkingDisk.toString()) + println(AwsBatchWorkingDisk.Name) + // configuration.root.startsWith(x.mountPoint.pathAsString) + x.name == AwsBatchWorkingDisk.Name + } }).get - lazy val callRootPath: Path = callPaths.callExecutionRoot lazy val returnCodeFilename: String = callPaths.returnCodeFilename diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala index ae356b89772..10b617580d4 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala @@ -93,7 +93,15 @@ trait AwsBatchJobDefinitionBuilder { val environment = List.empty[KeyValuePair] - def buildVolumes(disks: Seq[AwsBatchVolume]): List[Volume] = { + def buildVolumes(disks: Seq[AwsBatchVolume], fsx: Option[List[String]]): List[Volume] = { + + val fsx_volumes = fsx.isDefined match { + case true => fsx.get.map(mnt => Volume.builder().name(mnt).host(Host.builder().sourcePath(s"/$mnt").build()).build()) + case false => List() + } + + println("FSX!!!!!!!!!!!!!!!!") + println(fsx_volumes) //all the configured disks plus the fetch and run volume and the aws-cli volume disks.map(d => d.toVolume()).toList ++ List( @@ -106,10 +114,18 @@ trait AwsBatchJobDefinitionBuilder { .name("awsCliHome") .host(Host.builder().sourcePath("/usr/local/aws-cli").build()) .build() - ) + ) ++ fsx_volumes } - def buildMountPoints(disks: Seq[AwsBatchVolume]): List[MountPoint] = { + def buildMountPoints(disks: Seq[AwsBatchVolume], fsx: Option[List[String]]): List[MountPoint] = { + + val fsx_disks = fsx.isDefined match { + case true => fsx.get.map(mnt => MountPoint.builder().readOnly(false).sourceVolume(mnt).containerPath(s"/$mnt").build()) + case false => List() + } + + println("FSX!!!!!!!!!!!!!!!!") + println(fsx_disks) //all the configured disks plus the fetch and run mount point and the AWS cli mount point disks.map(_.toMountPoint).toList ++ List( @@ -125,7 +141,7 @@ trait AwsBatchJobDefinitionBuilder { //where the aws-cli will be on the container .containerPath("/usr/local/aws-cli") .build() - ) + ) ++ fsx_disks } def buildName(imageName: String, packedCommand: String, volumes: List[Volume], mountPoints: List[MountPoint], env: Seq[KeyValuePair]): String = { @@ -146,8 +162,8 @@ trait AwsBatchJobDefinitionBuilder { case _ => context.commandText } val packedCommand = packCommand("/bin/bash", "-c", cmdName) - val volumes = buildVolumes( context.runtimeAttributes.disks ) - val mountPoints = buildMountPoints( context.runtimeAttributes.disks) + val volumes = buildVolumes( context.runtimeAttributes.disks, context.fsxFileSystem) + val mountPoints = buildMountPoints( context.runtimeAttributes.disks, context.fsxFileSystem) val jobDefinitionName = buildName( context.runtimeAttributes.dockerImage, packedCommand.mkString(","), @@ -216,7 +232,8 @@ case class AwsBatchJobDefinitionContext( jobDescriptor: BackendJobDescriptor, jobPaths: JobPaths, inputs: Set[AwsBatchInput], - outputs: Set[AwsBatchFileOutput]){ + outputs: Set[AwsBatchFileOutput], + fsxFileSystem: Option[List[String]]){ override def toString: String = { new ToStringBuilder(this, ToStringStyle.JSON_STYLE) @@ -229,6 +246,7 @@ case class AwsBatchJobDefinitionContext( .append("jobPaths", jobPaths) .append("inputs", inputs) .append("outputs", outputs) + .append("fsxFileSystem", fsxFileSystem) .build } } From c639ed1fefc115d03c8cadbe0e931dbae4281864 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Wed, 5 Jan 2022 10:21:02 +0000 Subject: [PATCH 032/326] add ulimits and aws batch retries --- .../main/scala/cromwell/backend/backend.scala | 2 + .../StandardAsyncExecutionActor.scala | 2 + ...wsBatchAsyncBackendJobExecutionActor.scala | 14 -- .../backend/impl/aws/AwsBatchAttributes.scala | 9 +- .../backend/impl/aws/AwsBatchJob.scala | 10 +- .../aws/AwsBatchJobCachingActorHelper.scala | 20 +-- .../impl/aws/AwsBatchJobDefinition.scala | 99 ++++++++------ .../impl/aws/AwsBatchRuntimeAttributes.scala | 123 +++++++++++++++++- 8 files changed, 196 insertions(+), 83 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/backend.scala b/backend/src/main/scala/cromwell/backend/backend.scala index ea413c10367..18718f0e76f 100644 --- a/backend/src/main/scala/cromwell/backend/backend.scala +++ b/backend/src/main/scala/cromwell/backend/backend.scala @@ -139,6 +139,8 @@ object CommonBackendConfigurationAttributes { "default-runtime-attributes.noAddress", "default-runtime-attributes.docker", "default-runtime-attributes.queueArn", + "default-runtime-attributes.awsBatchRetryAttempts", + "default-runtime-attributes.ulimits", "default-runtime-attributes.failOnStderr", "slow-job-warning-time", "dockerhub", diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 96adb8c07b5..67e05464d54 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -449,8 +449,10 @@ trait StandardAsyncExecutionActor |tee $stderrRedirection < "$$$err" >&2 & |( |cd ${cwd.pathAsString} + |set -x |ENVIRONMENT_VARIABLES |INSTANTIATED_COMMAND + |set +x |) $stdinRedirection > "$$$out" 2> "$$$err" |echo $$? > $rcTmpPath |$emptyDirectoryFillCommand diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 49602106660..4b04f4cf1f5 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -286,25 +286,11 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar } } - val p = DefaultPathBuilder.get(path) - println("!!!!!!!!!!!!!!!!!!!!!!!!! relativePathAndVolume") - println(p.toString()) - println(p.isAbsolute) - println(getAbsolutePath(p).toString()) - println(configuration.root) - println(DefaultPathBuilder.get(configuration.root).toString()) - val absolutePath = DefaultPathBuilder.get(path) match { case p if !p.isAbsolute => getAbsolutePath(p) case p => p } - println(absolutePath.toString()) - disks.map(x=>{ - println(x.mountPoint.toString()) - println("--------------") - }) - disks.find(d => absolutePath.startsWith(d.mountPoint)) match { case Some(disk) => (disk.mountPoint.relativize(absolutePath), disk) case None => diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala index 55da57c90e2..9236b8342db 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala @@ -72,7 +72,14 @@ object AwsBatchAttributes { "filesystems.local.auth", "filesystems.s3.auth", "filesystems.s3.caching.duplication-strategy", - "filesystems.local.caching.duplication-strategy" + "filesystems.local.caching.duplication-strategy", + "auth", + "numCreateDefinitionAttempts", + "filesystems.s3.duplication-strategy", + "numSubmitAttempts", + "default-runtime-attributes.scriptBucketName", + "awsBatchRetryAttempts", + "ulimits" ) private val deprecatedAwsBatchKeys: Map[String, String] = Map( diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index eb4680662a2..352a07f6cd1 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -402,16 +402,20 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL // See: // // http://aws-java-sdk-javadoc.s3-website-us-west-2.amazonaws.com/latest/software/amazon/awssdk/services/batch/model/RegisterJobDefinitionRequest.Builder.html - val definitionRequest = RegisterJobDefinitionRequest.builder + var definitionRequest = RegisterJobDefinitionRequest.builder .containerProperties(jobDefinition.containerProperties) .jobDefinitionName(jobDefinitionName) // See https://stackoverflow.com/questions/24349517/scala-method-named-type .`type`(JobDefinitionType.CONTAINER) - .build + //.build + + if (jobDefinitionContext.runtimeAttributes.awsBatchRetryAttempts != 0){ + definitionRequest = definitionRequest.retryStrategy(jobDefinition.retryStrategy) + } Log.debug(s"Submitting definition request: $definitionRequest") - val response: RegisterJobDefinitionResponse = batchClient.registerJobDefinition(definitionRequest) + val response: RegisterJobDefinitionResponse = batchClient.registerJobDefinition(definitionRequest.build) Log.info(s"Definition created: $response") response.jobDefinitionArn() } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala index a9c416563d3..a3c693855be 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobCachingActorHelper.scala @@ -55,27 +55,9 @@ trait AwsBatchJobCachingActorHelper extends StandardCachingActorHelper { lazy val runtimeAttributes: AwsBatchRuntimeAttributes = AwsBatchRuntimeAttributes(validatedRuntimeAttributes, configuration.runtimeConfig, configuration.fileSystem) - println("!!!!!!!!!!!!!!!!!!! DISKS") - println(runtimeAttributes.disks) - runtimeAttributes.disks.map(x =>{ - println(x.fsType) - println(x.mountPoint) - println(x.name) - }) - lazy val workingDisk: AwsBatchVolume = runtimeAttributes.disks.find(x => configuration.fileSystem match { - case AWSBatchStorageSystems.s3 => { - println("!!!!!!!!!!!!!!!!!! S3") - println(x.name) - println(AwsBatchWorkingDisk.Name) - x.name == AwsBatchWorkingDisk.Name - } + case AWSBatchStorageSystems.s3 => x.name == AwsBatchWorkingDisk.Name case _ => { - println("!!!!!!!!!!!!!!!!!! LOCAL") - println(configuration.root) - println(x.mountPoint.pathAsString) - println(AwsBatchWorkingDisk.toString()) - println(AwsBatchWorkingDisk.Name) // configuration.root.startsWith(x.mountPoint.pathAsString) x.name == AwsBatchWorkingDisk.Name } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala index 10b617580d4..db06a9a4c8d 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala @@ -34,7 +34,7 @@ package cromwell.backend.impl.aws import scala.collection.mutable.ListBuffer import cromwell.backend.BackendJobDescriptor import cromwell.backend.io.JobPaths -import software.amazon.awssdk.services.batch.model.{ContainerProperties, Host, KeyValuePair, MountPoint, ResourceRequirement, ResourceType, Volume} +import software.amazon.awssdk.services.batch.model.{ContainerProperties, Host, KeyValuePair, MountPoint, ResourceRequirement, ResourceType, Volume, RetryStrategy, Ulimit} import cromwell.backend.impl.aws.io.AwsBatchVolume import scala.collection.JavaConverters._ @@ -60,12 +60,14 @@ import wdl4s.parser.MemoryUnit */ sealed trait AwsBatchJobDefinition { def containerProperties: ContainerProperties + def retryStrategy: RetryStrategy def name: String override def toString: String = { new ToStringBuilder(this, ToStringStyle.JSON_STYLE) .append("name", name) .append("containerProperties", containerProperties) + .append("retryStrategy", retryStrategy) .build } } @@ -76,21 +78,11 @@ trait AwsBatchJobDefinitionBuilder { /** Gets a builder, seeded with appropriate portions of the container properties * - * @param dockerImage docker image with which to run - * @return ContainerProperties builder ready for modification + * @param context AwsBatchJobDefinitionContext with all the runtime attributes + * @return ContainerProperties builder ready for modification and name * */ - def builder(dockerImage: String): ContainerProperties.Builder = - ContainerProperties.builder().image(dockerImage) - - - def buildResources(builder: ContainerProperties.Builder, - context: AwsBatchJobDefinitionContext): (ContainerProperties.Builder, String) = { - // The initial buffer should only contain one item - the hostpath of the - // local disk mount point, which will be needed by the docker container - // that copies data around - - val environment = List.empty[KeyValuePair] + def containerPropertiesBuilder(context: AwsBatchJobDefinitionContext): (ContainerProperties.Builder, String) = { def buildVolumes(disks: Seq[AwsBatchVolume], fsx: Option[List[String]]): List[Volume] = { @@ -100,9 +92,6 @@ trait AwsBatchJobDefinitionBuilder { case false => List() } - println("FSX!!!!!!!!!!!!!!!!") - println(fsx_volumes) - //all the configured disks plus the fetch and run volume and the aws-cli volume disks.map(d => d.toVolume()).toList ++ List( Volume.builder() @@ -124,9 +113,6 @@ trait AwsBatchJobDefinitionBuilder { case false => List() } - println("FSX!!!!!!!!!!!!!!!!") - println(fsx_disks) - //all the configured disks plus the fetch and run mount point and the AWS cli mount point disks.map(_.toMountPoint).toList ++ List( MountPoint.builder() @@ -144,36 +130,42 @@ trait AwsBatchJobDefinitionBuilder { ) ++ fsx_disks } - def buildName(imageName: String, packedCommand: String, volumes: List[Volume], mountPoints: List[MountPoint], env: Seq[KeyValuePair]): String = { - val str = s"$imageName:$packedCommand:${volumes.map(_.toString).mkString(",")}:${mountPoints.map(_.toString).mkString(",")}:${env.map(_.toString).mkString(",")}" + def buildUlimits(ulimits: Seq[Map[String, String]]): List[Ulimit] = { - val sha1 = MessageDigest.getInstance("SHA-1") - .digest( str.getBytes("UTF-8") ) - .map("%02x".format(_)).mkString - - val prefix = s"cromwell_$imageName".slice(0,88) // will be joined to a 40 character SHA1 for total length of 128 - - sanitize(prefix + sha1) + ulimits.filter(_.nonEmpty).map(u => + Ulimit.builder() + .name(u("name")) + .softLimit(u("softLimit").toInt) + .hardLimit(u("hardLimit").toInt) + .build() + ).toList } + def buildName(imageName: String, packedCommand: String, volumes: List[Volume], mountPoints: List[MountPoint], env: Seq[KeyValuePair], ulimits: List[Ulimit]): String = { + s"$imageName:$packedCommand:${volumes.map(_.toString).mkString(",")}:${mountPoints.map(_.toString).mkString(",")}:${env.map(_.toString).mkString(",")}:${ulimits.map(_.toString).mkString(",")}" + } + val environment = List.empty[KeyValuePair] val cmdName = context.runtimeAttributes.fileSystem match { - case AWSBatchStorageSystems.s3 => "/var/scratch/fetch_and_run.sh" - case _ => context.commandText + case AWSBatchStorageSystems.s3 => "/var/scratch/fetch_and_run.sh" + case _ => context.commandText } val packedCommand = packCommand("/bin/bash", "-c", cmdName) val volumes = buildVolumes( context.runtimeAttributes.disks, context.fsxFileSystem) val mountPoints = buildMountPoints( context.runtimeAttributes.disks, context.fsxFileSystem) - val jobDefinitionName = buildName( + val ulimits = buildUlimits( context.runtimeAttributes.ulimits) + val containerPropsName = buildName( context.runtimeAttributes.dockerImage, packedCommand.mkString(","), volumes, mountPoints, - environment + environment, + ulimits ) - (builder - .command(packedCommand.asJava) + (ContainerProperties.builder() + .image(context.runtimeAttributes.dockerImage) + .command(packedCommand.asJava) .resourceRequirements( ResourceRequirement.builder() .`type`(ResourceType.MEMORY) @@ -186,9 +178,18 @@ trait AwsBatchJobDefinitionBuilder { ) .volumes( volumes.asJava) .mountPoints( mountPoints.asJava) - .environment(environment.asJava), + .environment(environment.asJava) + .ulimits(ulimits.asJava), - jobDefinitionName) + containerPropsName) + } + + def retryStrategyBuilder(context: AwsBatchJobDefinitionContext): (RetryStrategy.Builder, String) = { + // We can add here the 'evaluateOnExit' statement + + (RetryStrategy.builder() + .attempts(context.runtimeAttributes.awsBatchRetryAttempts), + context.runtimeAttributes.awsBatchRetryAttempts.toString) } private def packCommand(shell: String, options: String, mainCommand: String): Seq[String] = { @@ -211,15 +212,29 @@ trait AwsBatchJobDefinitionBuilder { object StandardAwsBatchJobDefinitionBuilder extends AwsBatchJobDefinitionBuilder { def build(context: AwsBatchJobDefinitionContext): AwsBatchJobDefinition = { - //instantiate a builder with the name of the docker image - val builderInst = builder(context.runtimeAttributes.dockerImage) - val (b, name) = buildResources(builderInst, context) - new StandardAwsBatchJobDefinitionBuilder(b.build, name) + val (containerPropsInst, containerPropsName) = containerPropertiesBuilder(context) + val (retryStrategyInst, retryStrategyName) = retryStrategyBuilder(context) + + val name = buildName(context.runtimeAttributes.dockerImage, containerPropsName, retryStrategyName) + + new StandardAwsBatchJobDefinitionBuilder(containerPropsInst.build, retryStrategyInst.build, name) + } + + def buildName(imageName: String, containerPropsName: String, retryStrategyName: String): String = { + val str = s"$imageName:$containerPropsName:$retryStrategyName" + + val sha1 = MessageDigest.getInstance("SHA-1") + .digest( str.getBytes("UTF-8") ) + .map("%02x".format(_)).mkString + + val prefix = s"cromwell_${imageName}_".slice(0,88) // will be joined to a 40 character SHA1 for total length of 128 + + sanitize(prefix + sha1) } } -case class StandardAwsBatchJobDefinitionBuilder private(containerProperties: ContainerProperties, name: String) extends AwsBatchJobDefinition +case class StandardAwsBatchJobDefinitionBuilder private(containerProperties: ContainerProperties, retryStrategy: RetryStrategy, name: String) extends AwsBatchJobDefinition object AwsBatchJobDefinitionContext diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala index c6fc2a5f51f..d563b7435cc 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala @@ -60,6 +60,8 @@ import scala.util.matching.Regex * @param noAddress is there no address * @param scriptS3BucketName the s3 bucket where the execution command or script will be written and, from there, fetched into the container and executed * @param fileSystem the filesystem type, default is "s3" + * @param awsBatchRetryAttempts number of attempts that AWS Batch will retry the task if it fails + * @param ulimits ulimit values to be passed to the container */ case class AwsBatchRuntimeAttributes(cpu: Int Refined Positive, zones: Vector[String], @@ -71,6 +73,8 @@ case class AwsBatchRuntimeAttributes(cpu: Int Refined Positive, continueOnReturnCode: ContinueOnReturnCode, noAddress: Boolean, scriptS3BucketName: String, + awsBatchRetryAttempts: Int, + ulimits: Vector[Map[String, String]], fileSystem:String= "s3") object AwsBatchRuntimeAttributes { @@ -79,6 +83,8 @@ object AwsBatchRuntimeAttributes { val scriptS3BucketKey = "scriptBucketName" + val awsBatchRetryAttemptsKey = "awsBatchRetryAttempts" + val ZonesKey = "zones" private val ZonesDefaultValue = WomString("us-east-1a") @@ -92,6 +98,9 @@ object AwsBatchRuntimeAttributes { private val MemoryDefaultValue = "2 GB" + val UlimitsKey = "ulimits" + private val UlimitsDefaultValue = WomArray(WomArrayType(WomMapType(WomStringType,WomStringType)), Vector(WomMap(Map.empty[WomValue, WomValue]))) + private def cpuValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Int Refined Positive] = CpuValidation.instance .withDefault(CpuValidation.configDefaultWomValue(runtimeConfig) getOrElse CpuValidation.defaultMin) @@ -134,6 +143,14 @@ object AwsBatchRuntimeAttributes { QueueArnValidation.withDefault(QueueArnValidation.configDefaultWomValue(runtimeConfig) getOrElse (throw new RuntimeException("queueArn is required"))) + private def awsBatchRetryAttemptsValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Int] = { + AwsBatchRetryAttemptsValidation(awsBatchRetryAttemptsKey).withDefault(AwsBatchRetryAttemptsValidation(awsBatchRetryAttemptsKey) + .configDefaultWomValue(runtimeConfig).getOrElse(WomInteger(0))) + } + + private def ulimitsValidation(runtimeConfig: Option[Config]): RuntimeAttributesValidation[Vector[Map[String, String]]] = + UlimitsValidation.withDefault(UlimitsValidation.configDefaultWomValue(runtimeConfig) getOrElse UlimitsDefaultValue) + def runtimeAttributesBuilder(configuration: AwsBatchConfiguration): StandardValidatedRuntimeAttributesBuilder = { val runtimeConfig = configuration.runtimeConfig def validationsS3backend = StandardValidatedRuntimeAttributesBuilder.default(runtimeConfig).withValidation( @@ -146,7 +163,9 @@ object AwsBatchRuntimeAttributes { noAddressValidation(runtimeConfig), dockerValidation, queueArnValidation(runtimeConfig), - scriptS3BucketNameValidation(runtimeConfig) + scriptS3BucketNameValidation(runtimeConfig), + awsBatchRetryAttemptsValidation(runtimeConfig), + ulimitsValidation(runtimeConfig) ) def validationsLocalBackend = StandardValidatedRuntimeAttributesBuilder.default(runtimeConfig).withValidation( cpuValidation(runtimeConfig), @@ -157,7 +176,9 @@ object AwsBatchRuntimeAttributes { memoryMinValidation(runtimeConfig), noAddressValidation(runtimeConfig), dockerValidation, - queueArnValidation(runtimeConfig) + queueArnValidation(runtimeConfig), + awsBatchRetryAttemptsValidation(runtimeConfig), + ulimitsValidation(runtimeConfig) ) configuration.fileSystem match { @@ -180,8 +201,9 @@ object AwsBatchRuntimeAttributes { val scriptS3BucketName = fileSystem match { case AWSBatchStorageSystems.s3 => RuntimeAttributesValidation.extract(scriptS3BucketNameValidation(runtimeAttrsConfig) , validatedRuntimeAttributes) case _ => "" - } - + } + val awsBatchRetryAttempts: Int = RuntimeAttributesValidation.extract(awsBatchRetryAttemptsValidation(runtimeAttrsConfig), validatedRuntimeAttributes) + val ulimits: Vector[Map[String, String]] = RuntimeAttributesValidation.extract(ulimitsValidation(runtimeAttrsConfig), validatedRuntimeAttributes) new AwsBatchRuntimeAttributes( cpu, @@ -194,6 +216,8 @@ object AwsBatchRuntimeAttributes { continueOnReturnCode, noAddress, scriptS3BucketName, + awsBatchRetryAttempts, + ulimits, fileSystem ) } @@ -372,3 +396,94 @@ object DisksValidation extends RuntimeAttributesValidation[Seq[AwsBatchVolume]] override protected def missingValueMessage: String = s"Expecting $key runtime attribute to be a comma separated String or Array[String]" } + +object AwsBatchRetryAttemptsValidation { + def apply(key: String): AwsBatchRetryAttemptsValidation = new AwsBatchRetryAttemptsValidation(key) +} + +class AwsBatchRetryAttemptsValidation(key: String) extends IntRuntimeAttributesValidation(key) { + override protected def validateValue: PartialFunction[WomValue, ErrorOr[Int]] = { + case womValue if WomIntegerType.coerceRawValue(womValue).isSuccess => + WomIntegerType.coerceRawValue(womValue).get match { + case WomInteger(value) => + if (value.toInt < 0) + s"Expecting $key runtime attribute value greater than or equal to 0".invalidNel + else if (value.toInt > 10) + s"Expecting $key runtime attribute value lower than or equal to 10".invalidNel + else + value.toInt.validNel + } + } + + override protected def missingValueMessage: String = s"Expecting $key runtime attribute to be an Integer" +} + + +object UlimitsValidation + extends RuntimeAttributesValidation[Vector[Map[String, String]]] { + override def key: String = AwsBatchRuntimeAttributes.UlimitsKey + + override def coercion: Traversable[WomType] = + Set(WomStringType, WomArrayType(WomMapType(WomStringType, WomStringType))) + + var accepted_keys = Set("name", "softLimit", "hardLimit") + + override protected def validateValue + : PartialFunction[WomValue, ErrorOr[Vector[Map[String, String]]]] = { + case WomArray(womType, value) + if womType.memberType == WomMapType(WomStringType, WomStringType) => + check_maps(value.toVector) + case WomMap(_, _) => "!!! ERROR1".invalidNel + + } + + private def check_maps( + maps: Vector[WomValue] + ): ErrorOr[Vector[Map[String, String]]] = { + val entryNels: Vector[ErrorOr[Map[String, String]]] = maps.map { + case WomMap(_, value) => check_keys(value) + case _ => "!!! ERROR2".invalidNel + } + val sequenced: ErrorOr[Vector[Map[String, String]]] = sequenceNels( + entryNels + ) + sequenced + } + + private def check_keys( + dict: Map[WomValue, WomValue] + ): ErrorOr[Map[String, String]] = { + val map_keys = dict.keySet.map(_.valueString).toSet + val unrecognizedKeys = + accepted_keys.diff(map_keys) union map_keys.diff(accepted_keys) + + if (!dict.nonEmpty){ + Map.empty[String, String].validNel + }else if (unrecognizedKeys.nonEmpty) { + s"Invalid keys in $key runtime attribute. Refer to 'ulimits' section on https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#containerProperties".invalidNel + } else { + dict + .collect { case (WomString(k), WomString(v)) => + (k, v) + // case _ => "!!! ERROR3".invalidNel + } + .toMap + .validNel + } + } + + private def sequenceNels( + nels: Vector[ErrorOr[Map[String, String]]] + ): ErrorOr[Vector[Map[String, String]]] = { + val emptyNel: ErrorOr[Vector[Map[String, String]]] = + Vector.empty[Map[String, String]].validNel + val seqNel: ErrorOr[Vector[Map[String, String]]] = + nels.foldLeft(emptyNel) { (acc, v) => + (acc, v) mapN { (a, v) => a :+ v } + } + seqNel + } + + override protected def missingValueMessage: String = + s"Expecting $key runtime attribute to be an Array[Map[String, String]]" +} From 66b29b4fb23d0d2aa4ba476c198f0553b0e96131 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Mon, 10 Jan 2022 16:36:01 +0000 Subject: [PATCH 033/326] add exit to end of script --- .../backend/standard/StandardAsyncExecutionActor.scala | 1 + .../impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 67e05464d54..8693e1a0c4a 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -463,6 +463,7 @@ trait StandardAsyncExecutionActor |${directoryScripts(directoryOutputs)} |) |mv $rcTmpPath $rcPath + |exit $$(head -n 1 $rcPath) |""".stripMargin .replace("SCRIPT_PREAMBLE", scriptPreamble) .replace("ENVIRONMENT_VARIABLES", environmentVariables) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 4b04f4cf1f5..d869a423f74 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -124,7 +124,8 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar override lazy val dockerImageUsed: Option[String] = Option(jobDockerImage) private lazy val execScript = - s"""|${jobPaths.script.pathWithoutScheme} + s"""|ls -lah ${jobPaths.script.pathWithoutScheme} + |${jobPaths.script.pathWithoutScheme} |""".stripMargin From 02150b2bf20bbb12f0e548cfdd0822df5793c297 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Mon, 31 Jan 2022 16:40:07 +0000 Subject: [PATCH 034/326] add script closure --- .../standard/StandardAsyncExecutionActor.scala | 12 ++++++++---- ...AwsBatchAsyncBackendJobExecutionActor.scala | 18 +++++++++++++++++- .../backend/impl/aws/AwsBatchJob.scala | 12 +++++++----- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 8693e1a0c4a..d4c74730a48 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -329,6 +329,9 @@ trait StandardAsyncExecutionActor /** Any custom code that should be run within commandScriptContents before the instantiated command. */ def scriptPreamble: String = "" + /** Any custom code that should be run within commandScriptContents right before exiting. */ + def scriptClosure: String = "" + def cwd: Path = commandDirectory def rcPath: Path = cwd./(jobPaths.returnCodeFilename) @@ -447,14 +450,14 @@ trait StandardAsyncExecutionActor |touch $stdoutRedirection $stderrRedirection |tee $stdoutRedirection < "$$$out" & |tee $stderrRedirection < "$$$err" >&2 & + |set -x |( |cd ${cwd.pathAsString} - |set -x |ENVIRONMENT_VARIABLES |INSTANTIATED_COMMAND - |set +x |) $stdinRedirection > "$$$out" 2> "$$$err" |echo $$? > $rcTmpPath + |set +x |$emptyDirectoryFillCommand |( |cd ${cwd.pathAsString} @@ -463,13 +466,14 @@ trait StandardAsyncExecutionActor |${directoryScripts(directoryOutputs)} |) |mv $rcTmpPath $rcPath - |exit $$(head -n 1 $rcPath) + |SCRIPT_CLOSURE |""".stripMargin .replace("SCRIPT_PREAMBLE", scriptPreamble) .replace("ENVIRONMENT_VARIABLES", environmentVariables) .replace("INSTANTIATED_COMMAND", commandString) .replace("SCRIPT_EPILOGUE", scriptEpilogue) - .replace("DOCKER_OUTPUT_DIR_LINK", dockerOutputDir)) + .replace("DOCKER_OUTPUT_DIR_LINK", dockerOutputDir) + .replace("SCRIPT_CLOSURE", scriptClosure)) } def runtimeEnvironmentPathMapper(env: RuntimeEnvironment): RuntimeEnvironment = { diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index d869a423f74..5b6639344fa 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -124,7 +124,8 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar override lazy val dockerImageUsed: Option[String] = Option(jobDockerImage) private lazy val execScript = - s"""|ls -lah ${jobPaths.script.pathWithoutScheme} + s"""|df -h + |ls -lah ${jobPaths.script.pathWithoutScheme} |${jobPaths.script.pathWithoutScheme} |""".stripMargin @@ -399,6 +400,21 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar case _ => jobPaths.callExecutionRoot } + override def scriptPreamble: String = { + configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => "" + case _ => s"""|# clean directory in case of multiple retries + |ls | grep -v script | xargs rm -rf""".stripMargin + } + } + + override def scriptClosure: String = { + configuration.fileSystem match { + case AWSBatchStorageSystems.s3 => "" + case _ => s"exit $$(head -n 1 $rcPath)" + } + } + override def globParentDirectory(womGlobFile: WomGlobFile): Path = configuration.fileSystem match { case AWSBatchStorageSystems.s3 => diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 352a07f6cd1..50f1ca0fec7 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -186,21 +186,19 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL s""" |touch ${output.name} |$s3Cmd cp --no-progress ${output.name} ${output.s3key} - |if [ -e $globDirectory ]; then $s3Cmd cp --no-progress $globDirectory $s3GlobOutDirectory --recursive --exclude "cromwell_glob_control_file"; fi - |""".stripMargin + |if [ -e $globDirectory ]; then $s3Cmd cp --no-progress $globDirectory $s3GlobOutDirectory --recursive --exclude "cromwell_glob_control_file"; fi""".stripMargin case output: AwsBatchFileOutput if output.s3key.startsWith("s3://") && output.mount.mountPoint.pathAsString == AwsBatchWorkingDisk.MountPoint.pathAsString => //output is on working disk mount s""" - |$s3Cmd cp --no-progress $workDir/${output.local.pathAsString} ${output.s3key} - |""".stripMargin + |$s3Cmd cp --no-progress $workDir/${output.local.pathAsString} ${output.s3key}""".stripMargin case output: AwsBatchFileOutput => //output on a different mount s"$s3Cmd cp --no-progress ${output.mount.mountPoint.pathAsString}/${output.local.pathAsString} ${output.s3key}" case _ => "" }.mkString("\n") + "\n" + s""" - |if [ -f $workDir/${jobPaths.returnCodeFilename} ]; then $s3Cmd cp --no-progress $workDir/${jobPaths.returnCodeFilename} ${jobPaths.callRoot.pathAsString}/${jobPaths.returnCodeFilename} ; fi\n + |if [ -f $workDir/${jobPaths.returnCodeFilename} ]; then $s3Cmd cp --no-progress $workDir/${jobPaths.returnCodeFilename} ${jobPaths.callRoot.pathAsString}/${jobPaths.returnCodeFilename} ; fi |if [ -f $stdErr ]; then $s3Cmd cp --no-progress $stdErr ${jobPaths.standardPaths.error.pathAsString}; fi |if [ -f $stdOut ]; then $s3Cmd cp --no-progress $stdOut ${jobPaths.standardPaths.output.pathAsString}; fi |""".stripMargin @@ -214,6 +212,10 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL |echo '*** DELOCALIZING OUTPUTS ***' |$outputCopyCommand |echo '*** COMPLETED DELOCALIZATION ***' + |echo '*** EXITING WITH RETURN CODE ***' + |rc=$$(head -n 1 $workDir/${jobPaths.returnCodeFilename}) + |echo $$rc + |exit $$rc |} |""".stripMargin } From 2f434e1462bd053530f2f3c5a0dcfbcd4aaa5aea Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 1 Mar 2022 12:04:47 +0000 Subject: [PATCH 035/326] clean preamble; do not gzip batch command --- .../AwsBatchAsyncBackendJobExecutionActor.scala | 6 ++++-- .../impl/aws/AwsBatchJobDefinition.scala | 17 +++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 2ffca40e71d..9e65e819988 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -123,9 +123,12 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar override lazy val dockerImageUsed: Option[String] = Option(jobDockerImage) + // |cd ${jobPaths.script.parent.pathWithoutScheme}; ls | grep -v script | xargs rm -rf; cd - + private lazy val execScript = s"""|df -h |ls -lah ${jobPaths.script.pathWithoutScheme} + |find ${jobPaths.script.parent.pathWithoutScheme} -group root | grep -v script | xargs rm -vrf |${jobPaths.script.pathWithoutScheme} |""".stripMargin @@ -411,8 +414,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar override def scriptPreamble: String = { configuration.fileSystem match { case AWSBatchStorageSystems.s3 => "" - case _ => s"""|# clean directory in case of multiple retries - |ls | grep -v script | xargs rm -rf""".stripMargin + case _ => s"" } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala index 2adb27df5a8..3b8470c6c29 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala @@ -186,14 +186,15 @@ trait AwsBatchJobDefinitionBuilder { private def packCommand(shell: String, options: String, mainCommand: String): Seq[String] = { val rc = new ListBuffer[String]() - val lim = 1024 - val packedCommand = mainCommand.length() match { - case len if len <= lim => mainCommand - case len if len > lim => { - rc += "gzipdata" // This is hard coded in our agent and must be the first item - gzip(mainCommand) - } - } + // val lim = 1024 + // val packedCommand = mainCommand.length() match { + // case len if len <= lim => mainCommand + // case len if len > lim => { + // rc += "gzipdata" // This is hard coded in our agent and must be the first item + // gzip(mainCommand) + // } + // } + val packedCommand = mainCommand rc += shell rc += options rc += packedCommand From 1d20031e5ac9a58c0b856a7651a875dde620fe93 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Mon, 14 Mar 2022 11:36:56 -0400 Subject: [PATCH 036/326] improved logging --- .../registryv2/flows/aws/AmazonEcr.scala | 3 ++ .../flows/aws/AmazonEcrAbstract.scala | 18 +++++-- .../flows/aws/AmazonEcrPublic.scala | 6 ++- .../registryv2/flows/aws/EcrUtils.scala | 2 +- .../backend/impl/aws/AwsBatchJob.scala | 53 ++++++++++--------- 5 files changed, 51 insertions(+), 31 deletions(-) diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcr.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcr.scala index a073b9f4eb2..f1174009669 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcr.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcr.scala @@ -4,12 +4,14 @@ import cats.effect.IO import cromwell.docker.{DockerImageIdentifier, DockerInfoActor, DockerRegistryConfig} import org.http4s.AuthScheme import org.http4s.client.Client +import org.slf4j.{Logger, LoggerFactory} import software.amazon.awssdk.services.ecr.EcrClient import scala.compat.java8.OptionConverters._ import scala.concurrent.Future class AmazonEcr(override val config: DockerRegistryConfig, ecrClient: EcrClient = EcrClient.create()) extends AmazonEcrAbstract(config) { + private val logger: Logger = LoggerFactory.getLogger(this.getClass) override protected val authorizationScheme: AuthScheme = AuthScheme.Basic @@ -30,6 +32,7 @@ class AmazonEcr(override val config: DockerRegistryConfig, ecrClient: EcrClient override def accepts(dockerImageIdentifier: DockerImageIdentifier): Boolean = dockerImageIdentifier.hostAsString.contains("amazonaws.com") override protected def getToken(dockerInfoContext: DockerInfoActor.DockerInfoContext)(implicit client: Client[IO]): IO[Option[String]] = { + logger.info("obtaining access token for '{}'", dockerInfoContext.dockerImageID.fullName) val eventualMaybeToken = Future(ecrClient.getAuthorizationToken .authorizationData() .stream() diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrAbstract.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrAbstract.scala index bc9ea61fd74..f30f11fb971 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrAbstract.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrAbstract.scala @@ -6,31 +6,41 @@ import cromwell.docker.registryv2.DockerRegistryV2Abstract import cromwell.docker.registryv2.flows.aws.EcrUtils.{EcrForbidden, EcrNotFound, EcrUnauthorized} import org.apache.commons.codec.digest.DigestUtils import org.http4s.{Header, Response, Status} +import org.slf4j.{Logger, LoggerFactory} abstract class AmazonEcrAbstract(override val config: DockerRegistryConfig) extends DockerRegistryV2Abstract(config) { + private val logger: Logger = LoggerFactory.getLogger(this.getClass) + /** * Not used as getToken is overridden */ - override protected def authorizationServerHostName(dockerImageIdentifier: DockerImageIdentifier): String = "" + override protected def authorizationServerHostName(dockerImageIdentifier: DockerImageIdentifier): String = { + logger.warn("A call has been made to authorizationServerHostName for an ECR container but ECR auth should be done via SDK. This call will be ignored") + "" + } /** * Not used as getToken is overridden */ - override protected def buildTokenRequestHeaders(dockerInfoContext: DockerInfoActor.DockerInfoContext): List[Header] = List.empty + override protected def buildTokenRequestHeaders(dockerInfoContext: DockerInfoActor.DockerInfoContext): List[Header] = { + logger.warn("A call has been made to buildTokenRequestHeaders for an ECR container but ECR auth should be done via SDK. This call will be ignored") + List.empty + } /** - * Amazon ECR repositories don't have a digest header in responses so we must made it from the manifest body + * Amazon ECR repositories don't have a digest header in responses so we must make it from the manifest body */ override protected def getDigestFromResponse(response: Response[IO]): IO[DockerHashResult] = response match { case Status.Successful(r) => digestManifest(r.bodyText) - case Status.Unauthorized(_) => IO.raiseError(new EcrUnauthorized) + case Status.Unauthorized(_) => IO.raiseError(new EcrUnauthorized("Not authorized to obtain a digest from this registry. Your token may be invalid")) case Status.NotFound(_) => IO.raiseError(new EcrNotFound) case Status.Forbidden(_) => IO.raiseError(new EcrForbidden) case failed => failed.as[String].flatMap(body => IO.raiseError(new Exception(s"Failed to get manifest: $body"))) } private def digestManifest(bodyText: fs2.Stream[IO, String]): IO[DockerHashResult] = { + logger.info("computing sha256 digest for container manifest") bodyText .compile .string diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublic.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublic.scala index 797d4b70703..edff304a810 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublic.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/AmazonEcrPublic.scala @@ -3,6 +3,7 @@ package cromwell.docker.registryv2.flows.aws import cats.effect.IO import cromwell.docker.{DockerImageIdentifier, DockerInfoActor, DockerRegistryConfig} import org.http4s.client.Client +import org.slf4j.{Logger, LoggerFactory} import software.amazon.awssdk.services.ecrpublic.EcrPublicClient import software.amazon.awssdk.services.ecrpublic.model.GetAuthorizationTokenRequest @@ -10,6 +11,9 @@ import scala.concurrent.Future class AmazonEcrPublic(override val config: DockerRegistryConfig, ecrClient: EcrPublicClient = EcrPublicClient.create()) extends AmazonEcrAbstract(config) { + private val logger: Logger = LoggerFactory.getLogger(this.getClass) + + /** * public.ecr.aws */ @@ -23,7 +27,7 @@ class AmazonEcrPublic(override val config: DockerRegistryConfig, ecrClient: EcrP override protected def getToken(dockerInfoContext: DockerInfoActor.DockerInfoContext)(implicit client: Client[IO]): IO[Option[String]] = { - + logger.info("obtaining access token for '{}'", dockerInfoContext.dockerImageID.fullName) val eventualMaybeToken: Future[Option[String]] = Future( Option(ecrClient .getAuthorizationToken(GetAuthorizationTokenRequest.builder().build()) diff --git a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/EcrUtils.scala b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/EcrUtils.scala index 9da904c8adc..405c22076e4 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/EcrUtils.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/registryv2/flows/aws/EcrUtils.scala @@ -2,7 +2,7 @@ package cromwell.docker.registryv2.flows.aws object EcrUtils { - case class EcrUnauthorized() extends Exception + case class EcrUnauthorized(msg: String) extends Exception case class EcrNotFound() extends Exception case class EcrForbidden() extends Exception diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 3102f678aa6..0db547242c1 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -390,12 +390,11 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL //check if there is already a suitable definition based on the calculated job definition name - val jobDefinitionName = jobDefinition.name - Log.debug(s"Checking for existence of job definition called: $jobDefinitionName") + Log.debug(s"Checking for existence of job definition called: ${jobDefinition.name}") val describeJobDefinitionRequest = DescribeJobDefinitionsRequest.builder() - .jobDefinitionName( jobDefinitionName ) + .jobDefinitionName( jobDefinition.name ) .status("ACTIVE") .build() @@ -403,30 +402,19 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL if ( !describeJobDefinitionResponse.jobDefinitions.isEmpty ) { //sort the definitions so that the latest revision is at the head - val definitions = describeJobDefinitionResponse.jobDefinitions().asScala.toList.sortWith(_.revision > _.revision) - - //return the arn of the job - definitions.head.jobDefinitionArn() - } else { - Log.debug(s"No job definition found. Creating job definition: $jobDefinitionName") - - // See: - // - // http://aws-java-sdk-javadoc.s3-website-us-west-2.amazonaws.com/latest/software/amazon/awssdk/services/batch/model/RegisterJobDefinitionRequest.Builder.html - var definitionRequest = RegisterJobDefinitionRequest.builder - .containerProperties(jobDefinition.containerProperties) - .jobDefinitionName(jobDefinitionName) - // See https://stackoverflow.com/questions/24349517/scala-method-named-type - .`type`(JobDefinitionType.CONTAINER) - - if (jobDefinitionContext.runtimeAttributes.awsBatchRetryAttempts != 0){ - definitionRequest = definitionRequest.retryStrategy(jobDefinition.retryStrategy) + val existingDefinition = describeJobDefinitionResponse.jobDefinitions().asScala.toList.sortWith(_.revision > _.revision).head + + //TODO test this + if (existingDefinition.containerProperties().memory() != null || existingDefinition.containerProperties().vcpus() != null) { + Log.warn("the job definition '{}' has deprecated configuration for memory and vCPU and will be replaced", existingDefinition.jobDefinitionName()) + registerJobDefinition(jobDefinition, jobDefinitionContext).jobDefinitionArn() + } else { + existingDefinition.jobDefinitionArn() } + } else { + Log.debug(s"No job definition found. Creating job definition: ${jobDefinition.name}") - Log.debug(s"Submitting definition request: $definitionRequest") - - val response: RegisterJobDefinitionResponse = batchClient.registerJobDefinition(definitionRequest.build) - Log.info(s"Definition created: $response") + val response: RegisterJobDefinitionResponse = registerJobDefinition(jobDefinition, jobDefinitionContext) response.jobDefinitionArn() } }) @@ -455,6 +443,21 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL } } + def registerJobDefinition(jobDefinition: AwsBatchJobDefinition, jobDefinitionContext: AwsBatchJobDefinitionContext): RegisterJobDefinitionResponse = { + // See: + // + // http://aws-java-sdk-javadoc.s3-website-us-west-2.amazonaws.com/latest/software/amazon/awssdk/services/batch/model/RegisterJobDefinitionRequest.Builder.html + var definitionRequest = RegisterJobDefinitionRequest.builder + .containerProperties(jobDefinition.containerProperties) + .jobDefinitionName(jobDefinition.name) + // See https://stackoverflow.com/questions/24349517/scala-method-named-type + .`type`(JobDefinitionType.CONTAINER) + + if (jobDefinitionContext.runtimeAttributes.awsBatchRetryAttempts != 0){ + definitionRequest = definitionRequest.retryStrategy(jobDefinition.retryStrategy) + } + batchClient.registerJobDefinition(definitionRequest.build) + } /** Gets the status of a job by its Id, converted to a RunStatus * From 60cd8dba265a88b1dcb31aa0dd62ae948ffb8818 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 15 Mar 2022 10:47:49 +0000 Subject: [PATCH 037/326] dump --- ...wsBatchAsyncBackendJobExecutionActor.scala | 2 +- .../backend/impl/aws/AwsBatchAttributes.scala | 26 ++++++++++--------- .../impl/aws/AwsBatchConfiguration.scala | 2 +- .../backend/impl/aws/AwsBatchJob.scala | 4 +-- .../impl/aws/AwsBatchJobDefinition.scala | 8 +++--- 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 9e65e819988..9fb5ba7ca9f 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -183,7 +183,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar jobPaths, Seq.empty[AwsBatchParameter], configuration.awsConfig.region, Option(configuration.awsAuth), - configuration.fsxFileSystem) + configuration.fsxMntPoint) } /* Tries to abort the job in flight * diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala index 9236b8342db..f1fd47c8ddd 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAttributes.scala @@ -60,7 +60,7 @@ case class AwsBatchAttributes(fileSystem: String, duplicationStrategy: AwsBatchCacheHitDuplicationStrategy, submitAttempts: Int Refined Positive, createDefinitionAttempts: Int Refined Positive, - fsxFileSystem: Option[List[String]]) + fsxMntPoint: Option[List[String]]) object AwsBatchAttributes { lazy val Logger = LoggerFactory.getLogger(this.getClass) @@ -70,12 +70,14 @@ object AwsBatchAttributes { "root", "filesystems", "filesystems.local.auth", + "filesystems.local.fsx", + "filesystems.local.localization", + "filesystems.local.caching.hashing-strategy", + "filesystems.local.caching.duplication-strategy", "filesystems.s3.auth", "filesystems.s3.caching.duplication-strategy", - "filesystems.local.caching.duplication-strategy", "auth", "numCreateDefinitionAttempts", - "filesystems.s3.duplication-strategy", "numSubmitAttempts", "default-runtime-attributes.scriptBucketName", "awsBatchRetryAttempts", @@ -98,11 +100,10 @@ object AwsBatchAttributes { deprecatedKeys foreach { key => logger.warn(s"Found deprecated configuration key $key, replaced with ${deprecated.get(key)}") } } - def parseFSx(backendConfig: Config): Option[List[String]] = { - val fsxConfig = backendConfig.getStringList("filesystems.fsx") - fsxConfig.isEmpty match { + def parseFSx(config: List[String]): Option[List[String]] = { + config.isEmpty match { case true => None - case false => Some(fsxConfig.asScala.toList) + case false => Some(config) } } @@ -140,10 +141,11 @@ object AwsBatchAttributes { } } - val fsx: ErrorOr[Option[List[String]]] = validate {backendConfig.hasPath("filesystems.fsx") match { - case true => parseFSx(backendConfig) - case false => None - }} + val fsxMntPoint: ErrorOr[Option[List[String]]] = validate {backendConfig.hasPath("filesystems.local.fsx") match { + case true => parseFSx(backendConfig.getStringList("filesystems.local.fsx").asScala.toList) + case false => None + } + } ( fileSysStr, @@ -152,7 +154,7 @@ object AwsBatchAttributes { duplicationStrategy, backendConfig.as[ErrorOr[Int Refined Positive]]("numSubmitAttempts"), backendConfig.as[ErrorOr[Int Refined Positive]]("numCreateDefinitionAttempts"), - fsx + fsxMntPoint ).tupled.map((AwsBatchAttributes.apply _).tupled) match { case Valid(r) => r case Invalid(f) => diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala index b9278485305..0dea4e0e167 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala @@ -55,7 +55,7 @@ class AwsBatchConfiguration(val configurationDescriptor: BackendConfigurationDes case true => S3PathBuilderFactory(configurationDescriptor.globalConfig, configurationDescriptor.backendConfig) case false => PathBuilderFactory } - val fsxFileSystem = batchAttributes.fsxFileSystem + val fsxMntPoint = batchAttributes.fsxMntPoint } object AWSBatchStorageSystems { diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 0c501774180..d7bbfc62271 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -84,7 +84,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL parameters: Seq[AwsBatchParameter], configRegion: Option[Region], optAwsAuthMode: Option[AwsAuthMode] = None, - fsxFileSystem: Option[List[String]] + fsxMntPoint: Option[List[String]] ) { @@ -390,7 +390,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL jobPaths = jobPaths, inputs = inputs, outputs = outputs, - fsxFileSystem = fsxFileSystem) + fsxMntPoint = fsxMntPoint) val jobDefinitionBuilder = StandardAwsBatchJobDefinitionBuilder val jobDefinition = jobDefinitionBuilder.build(jobDefinitionContext) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala index 3b8470c6c29..3e674255b95 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala @@ -150,8 +150,8 @@ trait AwsBatchJobDefinitionBuilder { case _ => context.commandText } val packedCommand = packCommand("/bin/bash", "-c", cmdName) - val volumes = buildVolumes( context.runtimeAttributes.disks, context.fsxFileSystem) - val mountPoints = buildMountPoints( context.runtimeAttributes.disks, context.fsxFileSystem) + val volumes = buildVolumes( context.runtimeAttributes.disks, context.fsxMntPoint) + val mountPoints = buildMountPoints( context.runtimeAttributes.disks, context.fsxMntPoint) val ulimits = buildUlimits( context.runtimeAttributes.ulimits) val containerPropsName = buildName( context.runtimeAttributes.dockerImage, @@ -240,7 +240,7 @@ case class AwsBatchJobDefinitionContext( jobPaths: JobPaths, inputs: Set[AwsBatchInput], outputs: Set[AwsBatchFileOutput], - fsxFileSystem: Option[List[String]]){ + fsxMntPoint: Option[List[String]]){ override def toString: String = { new ToStringBuilder(this, ToStringStyle.JSON_STYLE) @@ -253,7 +253,7 @@ case class AwsBatchJobDefinitionContext( .append("jobPaths", jobPaths) .append("inputs", inputs) .append("outputs", outputs) - .append("fsxFileSystem", fsxFileSystem) + .append("fsxMntPoint", fsxMntPoint) .build } } From bb37a716ed83f9ad6209122eb324469af2b6d72c Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 15 Mar 2022 12:25:10 +0000 Subject: [PATCH 038/326] add aws batch preamble; increase the character limit for job definition command --- .../AwsBatchAsyncBackendJobExecutionActor.scala | 6 ++---- .../impl/aws/AwsBatchJobDefinition.scala | 17 ++++++++--------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 9fb5ba7ca9f..8d44fd84d36 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -126,9 +126,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar // |cd ${jobPaths.script.parent.pathWithoutScheme}; ls | grep -v script | xargs rm -rf; cd - private lazy val execScript = - s"""|df -h - |ls -lah ${jobPaths.script.pathWithoutScheme} - |find ${jobPaths.script.parent.pathWithoutScheme} -group root | grep -v script | xargs rm -vrf + s"""|#!$jobShell |${jobPaths.script.pathWithoutScheme} |""".stripMargin @@ -414,7 +412,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar override def scriptPreamble: String = { configuration.fileSystem match { case AWSBatchStorageSystems.s3 => "" - case _ => s"" + case _ => s"find ${jobPaths.script.parent.pathWithoutScheme} -group root | grep -v script | xargs rm -vrf" } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala index 3e674255b95..0bdc9e00cc1 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJobDefinition.scala @@ -186,15 +186,14 @@ trait AwsBatchJobDefinitionBuilder { private def packCommand(shell: String, options: String, mainCommand: String): Seq[String] = { val rc = new ListBuffer[String]() - // val lim = 1024 - // val packedCommand = mainCommand.length() match { - // case len if len <= lim => mainCommand - // case len if len > lim => { - // rc += "gzipdata" // This is hard coded in our agent and must be the first item - // gzip(mainCommand) - // } - // } - val packedCommand = mainCommand + val lim = 20480 + val packedCommand = mainCommand.length() match { + case len if len <= lim => mainCommand + case len if len > lim => { + rc += "gzipdata" // This is hard coded in our agent and must be the first item + gzip(mainCommand) + } + } rc += shell rc += options rc += packedCommand From 0b40de32cb0ead900116ab86da39c213d5b78c11 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 15 Mar 2022 13:10:19 +0000 Subject: [PATCH 039/326] add dockerhub authentication --- project/Dependencies.scala | 1 + .../impl/aws/AwsBatchConfiguration.scala | 3 +- .../aws/AwsBatchInitializationActor.scala | 65 +++++++++++++++++++ .../scala/cromwell/backend/impl/aws/README.md | 14 ++++ 4 files changed, 82 insertions(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 9b7103a48df..51de922881a 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -323,6 +323,7 @@ object Dependencies { "ecs", "ecr", "ecrpublic", + "secretsmanager", ).map(artifactName => "software.amazon.awssdk" % artifactName % awsSdkV) private val googleCloudDependencies = List( diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala index 0dea4e0e167..78bbb2d1a51 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchConfiguration.scala @@ -45,7 +45,6 @@ class AwsBatchConfiguration(val configurationDescriptor: BackendConfigurationDes val runtimeConfig = configurationDescriptor.backendRuntimeAttributesConfig val batchAttributes = AwsBatchAttributes.fromConfigs(awsConfig, configurationDescriptor.backendConfig) val awsAuth = batchAttributes.auth - val dockerCredentials = BackendDockerConfiguration.build(configurationDescriptor.backendConfig).dockerCredentials val fileSystem = configurationDescriptor.backendConfig.hasPath("filesystems.s3") match { case true => "s3" @@ -55,6 +54,8 @@ class AwsBatchConfiguration(val configurationDescriptor: BackendConfigurationDes case true => S3PathBuilderFactory(configurationDescriptor.globalConfig, configurationDescriptor.backendConfig) case false => PathBuilderFactory } + val dockerCredentials = BackendDockerConfiguration.build(configurationDescriptor.backendConfig).dockerCredentials + val dockerToken: Option[String] = dockerCredentials map { _.token } val fsxMntPoint = batchAttributes.fsxMntPoint } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala index 93f06cd3477..347e697e55b 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala @@ -35,6 +35,8 @@ import java.io.IOException import akka.actor.ActorRef import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider +import software.amazon.awssdk.services.secretsmanager.SecretsManagerClient +import software.amazon.awssdk.services.secretsmanager.model.{CreateSecretRequest, SecretsManagerException, SecretListEntry, UpdateSecretRequest} import cromwell.filesystems.s3.batch.S3BatchCommandBuilder import cromwell.backend.standard.{StandardInitializationActor, StandardInitializationActorParams, StandardValidatedRuntimeAttributesBuilder} import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor, BackendInitializationData} @@ -42,8 +44,12 @@ import cromwell.core.io.DefaultIoCommandBuilder import cromwell.core.io.AsyncIoActorClient import cromwell.core.path.Path import wom.graph.CommandCallNode +import org.apache.commons.codec.binary.Base64 +import spray.json.{JsObject, JsString} +import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future +import scala.collection.JavaConverters._ case class AwsBatchInitializationActorParams ( @@ -65,6 +71,8 @@ object AwsBatchInitializationActor { class AwsBatchInitializationActor(params: AwsBatchInitializationActorParams) extends StandardInitializationActor(params) with AsyncIoActorClient { + val Log: Logger = LoggerFactory.getLogger(AwsBatchInitializationActor.getClass) + override lazy val ioActor = params.ioActor private val configuration = params.configuration override implicit val system = context.system @@ -87,6 +95,63 @@ class AwsBatchInitializationActor(params: AwsBatchInitializationActorParams) private lazy val provider: Future[AwsCredentialsProvider] = Future { configuration.awsAuth.provider() } + lazy val secretsClient: SecretsManagerClient = { + val builder = SecretsManagerClient.builder() + configureClient(builder, Option(configuration.awsAuth), configuration.awsConfig.region) + } + + private def storePrivateDockerToken(token: String) = { + try { + + val secretName: String = "cromwell/credentials/dockerhub" + + // Check if secret already exists + // If exists, update it otherwise create it + val secretsList: List[SecretListEntry] = secretsClient.listSecrets().secretList().asScala.toList + + if(secretsList.exists(_.name == secretName)){ + val secretRequest: UpdateSecretRequest = UpdateSecretRequest.builder() + .secretId(secretName) + .secretString(token) + .build(); + + secretsClient.updateSecret(secretRequest); + + Log.info(s"Secret '$secretName' was updated.") + } else { + val secretRequest: CreateSecretRequest = CreateSecretRequest.builder() + .name(secretName) + .secretString(token) + .build() + + secretsClient.createSecret(secretRequest) + + Log.info(s"Secret '$secretName' was created.") + } + } + catch { + case e: SecretsManagerException => Log.warn(e.awsErrorDetails().errorMessage()) + } + } + + val privateDockerUnencryptedToken: Option[String] = configuration.dockerToken flatMap { dockerToken => + new String(Base64.decodeBase64(dockerToken)).split(':') match { + case Array(username, password) => + // unencrypted tokens are base64-encoded username:password + Option(JsObject( + Map( + "username" -> JsString(username), + "password" -> JsString(password) + )).compactPrint) + case _ => throw new RuntimeException(s"provided dockerhub token '$dockerToken' is not a base64-encoded username:password") + } + } + + privateDockerUnencryptedToken match { + case Some(token) => storePrivateDockerToken(token) + case None => Log.debug("No docker token was passed") + } + override lazy val workflowPaths: Future[AwsBatchWorkflowPaths] = for { prov <- provider } yield new AwsBatchWorkflowPaths(workflowDescriptor, prov, configuration) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index f0c2ee2eb03..62206005546 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -379,6 +379,20 @@ the AWS Services involved in the processing of the work. NOTE: ECS Agent permissions currently must use the permissions as outlined in the AmazonEC2ContainerServiceForEC2Role managed policy. +Features +--------------------- +### Docker Hub Authentication + +Docker Hub authentication for AWS Backend enable users to access and use private Docker containers. + +1. Create an access token in Docker Hub; +2. Encode the following string as base64: `:` +3. Place the following snippet into `cromwell.conf` file under `config`: +``` +dockerhub { token = "" } +``` + + Future considerations --------------------- From cb134359e3871ace7fb19da04634d58d81ac3d17 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 15 Mar 2022 13:12:24 +0000 Subject: [PATCH 040/326] move features section to top --- .../scala/cromwell/backend/impl/aws/README.md | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index 62206005546..b766002af1c 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -29,6 +29,20 @@ defined. This infrastructure and all the associated configuration still exists; however, it is moved out of the Cromwell configuration. +Features +--------------------- +### Docker Hub Authentication + +Docker Hub authentication for AWS Backend enable users to access and use private Docker containers. + +1. Create an access token in Docker Hub; +2. Encode the following string as base64: `:` +3. Place the following snippet into `cromwell.conf` file under `config`: +``` +dockerhub { token = "" } +``` + + AWS Batch --------- @@ -379,19 +393,6 @@ the AWS Services involved in the processing of the work. NOTE: ECS Agent permissions currently must use the permissions as outlined in the AmazonEC2ContainerServiceForEC2Role managed policy. -Features ---------------------- -### Docker Hub Authentication - -Docker Hub authentication for AWS Backend enable users to access and use private Docker containers. - -1. Create an access token in Docker Hub; -2. Encode the following string as base64: `:` -3. Place the following snippet into `cromwell.conf` file under `config`: -``` -dockerhub { token = "" } -``` - Future considerations --------------------- From b42341e28615107f083fa5e003947432d14b927c Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 15 Mar 2022 14:23:57 +0000 Subject: [PATCH 041/326] update diagram1 --- .../scala/cromwell/backend/impl/aws/README.md | 53 +++++-------------- 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index b766002af1c..07023224cff 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -42,7 +42,6 @@ Docker Hub authentication for AWS Backend enable users to access and use private dockerhub { token = "" } ``` - AWS Batch --------- @@ -83,47 +82,23 @@ will auto-expand, generated shell scripts from S3 that contain the instructions of the workflow task -```text - +-------------+ - | | - | AWS Batch | - | | - +------+------+ - | - | - | - | - | - +----------------v------------------+ - | | - | Elastic Container Service (ECS) | - | | - +----------------+------------------+ - | - | - | - | - | -+------------------------v-------------------------+ -| | -| AutoScaling Group | -| | -| +---------------------------------+ | -| | | | -| | EC2 Instance | | -| | | | -| | +--------------------+ | | -| | | | | | -| | | Docker Container | | | -| | | | | | -| | +--------------------+ ... | | -| | | | -| +---------------------------------+ ... | -| | -+--------------------------------------------------+ +```mermaid + flowchart LR + subgraph auto ["AutoScaling Group"] + direction RL + subgraph ec2_1 ["EC2 Instance"] + docker_1["Docker Container"] + end + subgraph ec2_2 ["EC2 Instance"] + docker_2["Docker Container"] + end + end + batch["AWS Batch"]-->ecs["Elastic Container Service (ECS)"]; + ecs-->auto; ``` + Cromwell AWS Batch Backend -------------------------- From d4dc38d505e42d430019f14cacadbc7639c1af7e Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 15 Mar 2022 14:31:09 +0000 Subject: [PATCH 042/326] update diagram 2 and 3 --- .../scala/cromwell/backend/impl/aws/README.md | 78 +++++-------------- 1 file changed, 18 insertions(+), 60 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index 07023224cff..52d43f01d88 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -106,32 +106,14 @@ There are several scala classes as part of the AWS Batch Backend, but the primary classes involved in running the backend are shown below. The arrows represent the flow of job submission. -```text - +----------------------------------------+ - | | - | AwsBatchBackendLifecycleActorFactory | - | | - +------------------+---------------------+ - | - | - | - | - | - +------------------v----------------------+ - | | - | AwsBatchAsyncBackendJobExecutionActor | - | | - +------------------+----------------------+ - | - | - | - | - | - +-------v-------+ +-------------------------+ - | | | | - | AwsBatchJob +-----------------> AwsBatchJobDefinition | - | | | | - +---------------+ +-------------------------+ +```mermaid + flowchart TD; + factory[AwsBatchBackendLifecycleActorFactory] + execution[AwsBatchAsyncBackendJobExecutionActor] + job[AwsBatchJob] + definition[AwsBatchJobDefinition] + + factory-->execution-->job-->definition; ``` 1. The `AwsBatchBackendLifecycleActorFactory` class is configured by the user @@ -150,41 +132,17 @@ arrows represent the flow of job submission. AWS Batch Job Instantiation --------------------------- -```text - +--------------------+ - | | - | Cromwell Backend | - | | - +---------+----------+ - | - | - SubmitJob - | - | - +------v------+ - | | - | AWS Batch | - | | - +------^------+ - | - | - Polls - | - | - +------+------+ - | | - | ECS Agent | - | | - +------+------+ - | - Creates, Launches and Monitors - | - +--------v---------+ - | | - | Task Container | - | | - +------------------+ +```mermaid + flowchart TD; + cromwell[Cromwell Backend] + batch[AWS Batch] + ecs[ECS Agent] + task[Task Container] + + cromwell-- SubmitJob -->batch + batch-- Polls -->ecs + ecs-- Creates, Launches and Monitors -->task ``` When a Cromwell task begins, the Cromwell backend will call the SubmitJob From 355648388fcac88d61a68e087d384edc7a9fb5d7 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 15 Mar 2022 15:14:47 +0000 Subject: [PATCH 043/326] last diagram --- .../scala/cromwell/backend/impl/aws/README.md | 116 +++++++++--------- 1 file changed, 60 insertions(+), 56 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index 52d43f01d88..a898a61e4e4 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -134,11 +134,11 @@ AWS Batch Job Instantiation --------------------------- ```mermaid - flowchart TD; - cromwell[Cromwell Backend] - batch[AWS Batch] - ecs[ECS Agent] - task[Task Container] + flowchart TD + cromwell["Cromwell Backend"] + batch["AWS Batch"] + ecs["ECS Agent"] + task["Task Container"] cromwell-- SubmitJob -->batch batch-- Polls -->ecs @@ -252,58 +252,62 @@ The flow described below represents the permissions needed by each stage, from Cromwell server through the task running. This includes the permissions needed for the AWS Services involved in the processing of the work. -```text -+----------------------------+ -| | s3:GetObject on bucket for workflow and script bucket -| | s3:ListObjects on script bucket -| | s3:PutObject on script bucket -| Cromwell | batch:RegisterTaskDefinition -| | batch:SubmitJob -| | batch:DescribeJobs -| | batch:DescribeJobDefinitions -+-------------+--------------+ - | - | - | -+-------------v--------------+ -| | AWSBatchServiceRole managed policy - described at: -| AWS Batch | -| | https://docs.aws.amazon.com/batch/latest/userguide/service_IAM_role.html -+-------------+--------------+ - | - | - | -+-------------v--------------+ -| | AWSServiceRoleForECS Service-linked role, documented at: -| | -| Elastic Container Service | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/using-service-linked-roles.html -| | -| (See discussion #1 below) | AmazonEC2ContainerServiceAutoscaleRole managed policy - described at: -| | -| | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/autoscale_IAM_role.html -+-------------+--------------+ - | - | - | -+-------------v--------------+ -| | -| | AmazonEC2ContainerServiceforEC2Role managed policy, described at: -| ECS Agent (running on EC2) | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/instance_IAM_role.html (EC2) -| | OR -| | AmazonECSTaskExecutionRolePolicy managed policy, described at: -| | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_execution_IAM_role.html (Fargate) -+-------------+--------------+ - | - | - | -+-------------v--------------+ -| | Task Role permissions. These are user defined, but ecs-tasks.amazon.com must have sts:AssumeRole trust relationship defined. Documentation: -| Task Container | -| | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_IAM_role.html -| | s3:GetObject, s3:PutObject, s3:ListObjects -+----------------------------+ -``` +```mermaid + flowchart TD + cromwell["Cromwell"] + batch["AWS Batch"] + ecs["Elastic Container Service\n\n(See discussion #1 below)"] + ec2["ECS Agent (running on EC2)"] + task["Task Container"] + + cromwell_desc["s3:GetObject on bucket for workflow and script bucket + s3:ListObjects on script bucket + s3:PutObject on script bucket + batch:RegisterTaskDefinition + batch:SubmitJob + batch:DescribeJobs + batch:DescribeJobDefinitions"] + + batch_desc["AWSBatchServiceRole managed policy, described here"] + + ecs_desc["AWSServiceRoleForECS Service-linked role, described here + + AmazonEC2ContainerServiceAutoscaleRole managed policy, described here"] + + ec2_desc["(EC2) AmazonEC2ContainerServiceforEC2Role managed policy, described here + + (Fargate) AmazonECSTaskExecutionRolePolicy managed policy, described here"] + + task_desc["Task Role permissions. + These are user defined, but ecs-tasks.amazon.com must have sts:AssumeRole trust relationship defined. + Documentation here + s3:GetObject + s3:PutObject + s3:ListObjects"] + subgraph 1 [" "] + direction RL + cromwell_desc-->cromwell + end + subgraph 2 [" "] + direction RL + batch_desc-->batch + end + subgraph 3 [" "] + direction RL + ecs_desc-->ecs + end + subgraph 4 [" "] + direction RL + ec2_desc-->ec2 + end + subgraph 5 [" "] + direction RL + task_desc-->task + end + + 1-->2-->3-->4-->5 +``` 1. ECS has several sets of permissions for various items. AWS Batch, however, does not take advantage of certain features of ECS, most importantly From 26089fed992605ce709770442133bbdbf5241061 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 15 Mar 2022 15:20:45 +0000 Subject: [PATCH 044/326] add documentation --- docs/RuntimeAttributes.md | 4 +- .../scala/cromwell/backend/impl/aws/README.md | 47 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/docs/RuntimeAttributes.md b/docs/RuntimeAttributes.md index c31b6fbbf41..1f16e125eff 100644 --- a/docs/RuntimeAttributes.md +++ b/docs/RuntimeAttributes.md @@ -379,7 +379,9 @@ runtime { *Default: _0_* -This runtime attribute adds support to [*AWS Batch Automated Job Retries*](https://docs.aws.amazon.com/batch/latest/userguide/job_retries.html) which makes it possible to tackle transient job failures. For example, if a task fails due to a timeout from accessing an external service, then this option helps re-run the failed the task without having to re-run the entire workflow. It takes an Int, between 1 and 10, as a value that indicates the maximum number of times AWS Batch should retry a failed task. If the value 0 is passed, the [*Retry Strategy*](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#retryStrategy) will not be added to the job definiton and the task will run just once. +This runtime attribute adds support to [*AWS Batch Automated Job Retries*](https://docs.aws.amazon.com/batch/latest/userguide/job_retries.html) which makes it possible to tackle transient job failures. For example, if a task fails due to a timeout from accessing an external service, then this option helps re-run the failed the task without having to re-run the entire workflow. This option is also very useful when using SPOT instances. + +It takes an Int, between 1 and 10, as a value that indicates the maximum number of times AWS Batch should retry a failed task. If the value 0 is passed, the [*Retry Strategy*](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#retryStrategy) will not be added to the job definiton and the task will run just once. ``` runtime { diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index a898a61e4e4..4cb7e67bb4b 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -42,6 +42,53 @@ Docker Hub authentication for AWS Backend enable users to access and use private dockerhub { token = "" } ``` +### `awsBatchRetryAttempts` + +*Default: _0_* + +This runtime attribute adds support to [*AWS Batch Automated Job Retries*](https://docs.aws.amazon.com/batch/latest/userguide/job_retries.html) which makes it possible to tackle transient job failures. For example, if a task fails due to a timeout from accessing an external service, then this option helps re-run the failed the task without having to re-run the entire workflow. This option is also very useful when using SPOT instances. + +It takes an Int, between 1 and 10, as a value that indicates the maximum number of times AWS Batch should retry a failed task. If the value 0 is passed, the [*Retry Strategy*](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#retryStrategy) will not be added to the job definiton and the task will run just once. + +``` +runtime { + awsBatchRetryAttempts: integer +} +``` + +### `ulimits` + +*Default: _empty_* + +A list of [`ulimits`](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#containerProperties) values to set in the container. This parameter maps to `Ulimits` in the [Create a container](https://docs.docker.com/engine/api/v1.38/) section of the [Docker Remote API](https://docs.docker.com/engine/api/v1.38/) and the `--ulimit` option to [docker run](https://docs.docker.com/engine/reference/commandline/run/). + +``` +"ulimits": [ + { + "name": string, + "softLimit": integer, + "hardLimit": integer + } + ... +] +``` +Parameter description: + +- `name` + - The `type` of the `ulimit`. + - Type: String + - Required: Yes, when `ulimits` is used. + +- `softLimit` + - The soft limit for the `ulimit` type. + - Type: Integer + - Required: Yes, when `ulimits` is used. + +- `hardLimit` + - The hard limit for the `ulimit` type. + - Type: Integer + - Required: Yes, when `ulimits` is used. + AWS Batch --------- From 2971fa696e9cdea42c063bd1903fe3205de2ef84 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 15 Mar 2022 15:33:09 +0000 Subject: [PATCH 045/326] update readme --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f053f1a821..bb313473f58 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ [![Build Status](https://travis-ci.com/broadinstitute/cromwell.svg?branch=develop)](https://travis-ci.com/broadinstitute/cromwell?branch=develop) [![codecov](https://codecov.io/gh/broadinstitute/cromwell/branch/develop/graph/badge.svg)](https://codecov.io/gh/broadinstitute/cromwell) -## Welcome to Cromwell +## Welcome to "AWS-friendly" Cromwell + +More information regarding AWS features can be found [here](https://github.com/henriqueribeiro/cromwell/tree/master/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws) Cromwell is an open-source Workflow Management System for bioinformatics. Licensing is [BSD 3-Clause](LICENSE.txt). From 600df43845cb3a47374179a4dc3926116d29abbd Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Wed, 16 Mar 2022 18:47:07 +0000 Subject: [PATCH 046/326] move rm command to the batch command --- .../impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 8d44fd84d36..34334449587 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -127,6 +127,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar private lazy val execScript = s"""|#!$jobShell + |find ${jobPaths.script.parent.pathWithoutScheme} -group root | grep -v script | xargs rm -vrf |${jobPaths.script.pathWithoutScheme} |""".stripMargin @@ -412,7 +413,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar override def scriptPreamble: String = { configuration.fileSystem match { case AWSBatchStorageSystems.s3 => "" - case _ => s"find ${jobPaths.script.parent.pathWithoutScheme} -group root | grep -v script | xargs rm -vrf" + case _ => "" } } From 2773507423742c5ece540fcc9e748e16ef07be83 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Fri, 18 Mar 2022 15:04:51 +0100 Subject: [PATCH 047/326] Feature/ecr private (#2) * correct pull identifier for local docker hashing on ECR private * removed check for deprecated attributes, causing assembly to fail * catch missing RC file and retry job after 5min sleep instead of failing WF --- .../StandardAsyncExecutionActor.scala | 32 ++++++++++++++++--- .../cromwell/docker/local/DockerCliFlow.scala | 30 ++++++++++------- .../backend/impl/aws/AwsBatchJob.scala | 26 +++++++++------ 3 files changed, 63 insertions(+), 25 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index ea07651e825..df20a8b318f 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -1232,8 +1232,10 @@ trait StandardAsyncExecutionActor */ def handleExecutionResult(status: StandardAsyncRunState, oldHandle: StandardAsyncPendingExecutionHandle): Future[ExecutionHandle] = { - + + // get the memory retry code. def memoryRetryRC: Future[Boolean] = { + // convert int to boolean def returnCodeAsBoolean(codeAsOption: Option[String]): Boolean = { codeAsOption match { case Some(codeAsString) => @@ -1250,14 +1252,14 @@ trait StandardAsyncExecutionActor case None => false } } - + // read if the file exists def readMemoryRetryRCFile(fileExists: Boolean): Future[Option[String]] = { if (fileExists) asyncIo.contentAsStringAsync(jobPaths.memoryRetryRC, None, failOnOverflow = false).map(Option(_)) else Future.successful(None) } - + //finally : assign the yielded variable for { fileExists <- asyncIo.existsAsync(jobPaths.memoryRetryRC) retryCheckRCAsOption <- readMemoryRetryRCFile(fileExists) @@ -1265,11 +1267,31 @@ trait StandardAsyncExecutionActor } yield retryWithMoreMemory } + // get the exit code of the job. + def JobExitCode: Future[String] = { + + // read if the file exists + def readRCFile(fileExists: Boolean): Future[String] = { + if (fileExists) + asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) + else + jobLogger.warn("RC file not found. Setting job to failed & waiting 5m before retry.") + Thread.sleep(300000) + Future("1") + } + //finally : assign the yielded variable + for { + fileExists <- asyncIo.existsAsync(jobPaths.returnCode) + jobRC <- readRCFile(fileExists) + } yield jobRC + } + + // get path to sderr val stderr = jobPaths.standardPaths.error lazy val stderrAsOption: Option[Path] = Option(stderr) - + // get the three needed variables, using functions above or direct assignment. val stderrSizeAndReturnCodeAndMemoryRetry = for { - returnCodeAsString <- asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) + returnCodeAsString <- JobExitCode // Only check stderr size if we need to, otherwise this results in a lot of unnecessary I/O that // may fail due to race conditions on quickly-executing jobs. stderrSize <- if (failOnStdErr) asyncIo.sizeAsync(stderr) else Future.successful(0L) diff --git a/dockerHashing/src/main/scala/cromwell/docker/local/DockerCliFlow.scala b/dockerHashing/src/main/scala/cromwell/docker/local/DockerCliFlow.scala index f6bf6e96d4c..9be67d87de2 100644 --- a/dockerHashing/src/main/scala/cromwell/docker/local/DockerCliFlow.scala +++ b/dockerHashing/src/main/scala/cromwell/docker/local/DockerCliFlow.scala @@ -109,17 +109,25 @@ object DockerCliFlow { /** Utility for converting the flow image id to the format output by the docker cli. */ private def cliKeyFromImageId(context: DockerInfoContext): DockerCliKey = { val imageId = context.dockerImageID - (imageId.host, imageId.repository) match { - case (None, None) => - // For docker hub images (host == None), and don't include "library". - val repository = imageId.image - val tag = imageId.reference - DockerCliKey(repository, tag) - case _ => - // For all other images, include the host and repository. - val repository = s"${imageId.hostAsString}${imageId.nameWithDefaultRepository}" - val tag = imageId.reference - DockerCliKey(repository, tag) + // private aws ECR does not have library, check for ECR in docker host. + if ( imageId.hostAsString.matches(raw"\d+\.dkr\.ecr\..+\.amazonaws\.com/") ) { + val repository = s"${imageId.hostAsString}${imageId.image}" + val tag = imageId.reference + DockerCliKey(repository, tag) + } else { + (imageId.host, imageId.repository) match { + case (None, None) => + // For docker hub images (host == None), and don't include "library". + val repository = imageId.image + val tag = imageId.reference + DockerCliKey(repository, tag) + case _ => + // For all other images, include the host and repository. + val repository = s"${imageId.hostAsString}${imageId.nameWithDefaultRepository}" + val tag = imageId.reference + DockerCliKey(repository, tag) + + } } } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 0db547242c1..95aaef26a05 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -405,12 +405,12 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL val existingDefinition = describeJobDefinitionResponse.jobDefinitions().asScala.toList.sortWith(_.revision > _.revision).head //TODO test this - if (existingDefinition.containerProperties().memory() != null || existingDefinition.containerProperties().vcpus() != null) { - Log.warn("the job definition '{}' has deprecated configuration for memory and vCPU and will be replaced", existingDefinition.jobDefinitionName()) - registerJobDefinition(jobDefinition, jobDefinitionContext).jobDefinitionArn() - } else { - existingDefinition.jobDefinitionArn() - } + //if (existingDefinition.containerProperties().memory() != null || existingDefinition.containerProperties().vcpus() != null) { + // Log.warn("the job definition '{}' has deprecated configuration for memory and vCPU and will be replaced", existingDefinition.jobDefinitionName()) + // registerJobDefinition(jobDefinition, jobDefinitionContext).jobDefinitionArn() + //} else { + existingDefinition.jobDefinitionArn() + //} } else { Log.debug(s"No job definition found. Creating job definition: ${jobDefinition.name}") @@ -479,10 +479,18 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL jobDetail } - def rc(detail: JobDetail): Integer = { - detail.container.exitCode + // code didn't get into the null block, so possibly not needed. + def rc(detail: JobDetail): Integer = { + if (detail.container.exitCode == null) { + // if exitCode is not present, return failed ( exitCode == 127 for command not found) + Log.info("rc value missing. Setting to failed and sleeping for 30s...") + Thread.sleep(30000) + 127 + } else { + Log.info("rc value found. Setting to '{}'",detail.container.exitCode.toString()) + detail.container.exitCode + } } - def output(detail: JobDetail): String = { val events: Seq[OutputLogEvent] = cloudWatchLogsClient.getLogEvents(GetLogEventsRequest.builder // http://aws-java-sdk-javadoc.s3-website-us-west-2.amazonaws.com/latest/software/amazon/awssdk/services/batch/model/ContainerDetail.html#logStreamName-- From bad8e112480d1d2c24cec580b0dab7a92a740de7 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Sun, 20 Mar 2022 07:35:57 +0100 Subject: [PATCH 048/326] Corrected Syntax on retunCode Checking --- .../backend/standard/StandardAsyncExecutionActor.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index df20a8b318f..8ec3de54487 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -1274,10 +1274,11 @@ trait StandardAsyncExecutionActor def readRCFile(fileExists: Boolean): Future[String] = { if (fileExists) asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) - else + else { jobLogger.warn("RC file not found. Setting job to failed & waiting 5m before retry.") Thread.sleep(300000) Future("1") + } } //finally : assign the yielded variable for { From 0f1a23624ad666cc457ca4609890323778a614fc Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Sun, 20 Mar 2022 17:39:15 +0100 Subject: [PATCH 049/326] Corrected Syntax on retunCode Checking (#3) --- .../backend/standard/StandardAsyncExecutionActor.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index df20a8b318f..8ec3de54487 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -1274,10 +1274,11 @@ trait StandardAsyncExecutionActor def readRCFile(fileExists: Boolean): Future[String] = { if (fileExists) asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) - else + else { jobLogger.warn("RC file not found. Setting job to failed & waiting 5m before retry.") Thread.sleep(300000) Future("1") + } } //finally : assign the yielded variable for { From 8f6525cab264ab98445e23c0f54fde7080d9541d Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Thu, 24 Mar 2022 14:41:55 +0100 Subject: [PATCH 050/326] Added documentation for ECR Call Caching --- .../scala/cromwell/backend/impl/aws/README.md | 339 ++++++++++-------- 1 file changed, 189 insertions(+), 150 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index f0c2ee2eb03..fbbd7d50d67 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -29,6 +29,106 @@ defined. This infrastructure and all the associated configuration still exists; however, it is moved out of the Cromwell configuration. +Features +--------------------- +### Docker Hub Authentication + +Docker Hub authentication for AWS Backend enable users to access and use private Docker containers. + +1. Create an access token in Docker Hub; +2. Encode the following string as base64: `:` +3. Place the following snippet into `cromwell.conf` file under `config`: +``` +dockerhub { token = "" } +``` + +### `awsBatchRetryAttempts` + +*Default: _0_* + +This runtime attribute adds support to [*AWS Batch Automated Job Retries*](https://docs.aws.amazon.com/batch/latest/userguide/job_retries.html) which makes it possible to tackle transient job failures. For example, if a task fails due to a timeout from accessing an external service, then this option helps re-run the failed the task without having to re-run the entire workflow. This option is also very useful when using SPOT instances. + +It takes an Int, between 1 and 10, as a value that indicates the maximum number of times AWS Batch should retry a failed task. If the value 0 is passed, the [*Retry Strategy*](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#retryStrategy) will not be added to the job definiton and the task will run just once. + +``` +runtime { + awsBatchRetryAttempts: integer +} +``` + +### `ulimits` + +*Default: _empty_* + +A list of [`ulimits`](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#containerProperties) values to set in the container. This parameter maps to `Ulimits` in the [Create a container](https://docs.docker.com/engine/api/v1.38/) section of the [Docker Remote API](https://docs.docker.com/engine/api/v1.38/) and the `--ulimit` option to [docker run](https://docs.docker.com/engine/reference/commandline/run/). + +``` +"ulimits": [ + { + "name": string, + "softLimit": integer, + "hardLimit": integer + } + ... +] +``` +Parameter description: + +- `name` + - The `type` of the `ulimit`. + - Type: String + - Required: Yes, when `ulimits` is used. + +- `softLimit` + - The soft limit for the `ulimit` type. + - Type: Integer + - Required: Yes, when `ulimits` is used. + +- `hardLimit` + - The hard limit for the `ulimit` type. + - Type: Integer + - Required: Yes, when `ulimits` is used. + +### Call Caching with ECR private + +AWS ECR is a private container registry, for which access can be regulated using IAM. Call caching is possible by setting up the following configuration: + +1. Setup a user with pull-access to ECR, then use this role to run cromwell + +profile default region must be setup in ~/.aws/config: +``` +[profile MyECR-user] +region = eu-west-1 +``` +Provide the profile when launching cromwell: +``` +AWS_PROFILE=MyECR-user java .... -jar cromwell.jar run .... +``` + +Other methods to provide the profile might also work, but are not tested (Environment, roles, ...) + +2. Enable call caching in the cromwell configuration + +The following statement enable call caching, with "local" hash checking: + +``` +call-caching { + enabled = true + invalidate-bad-cache-results = true +} +docker { + hash-lookup { + method = "local" + } +} +``` + +Notes: +- local hashing means that all used containers are pulled. Make sure you have enough storage +- enable a database to make the cache persistent over cromwell restarts + + + AWS Batch --------- @@ -69,47 +169,23 @@ will auto-expand, generated shell scripts from S3 that contain the instructions of the workflow task -```text - +-------------+ - | | - | AWS Batch | - | | - +------+------+ - | - | - | - | - | - +----------------v------------------+ - | | - | Elastic Container Service (ECS) | - | | - +----------------+------------------+ - | - | - | - | - | -+------------------------v-------------------------+ -| | -| AutoScaling Group | -| | -| +---------------------------------+ | -| | | | -| | EC2 Instance | | -| | | | -| | +--------------------+ | | -| | | | | | -| | | Docker Container | | | -| | | | | | -| | +--------------------+ ... | | -| | | | -| +---------------------------------+ ... | -| | -+--------------------------------------------------+ +```mermaid + flowchart LR + subgraph auto ["AutoScaling Group"] + direction RL + subgraph ec2_1 ["EC2 Instance"] + docker_1["Docker Container"] + end + subgraph ec2_2 ["EC2 Instance"] + docker_2["Docker Container"] + end + end + batch["AWS Batch"]-->ecs["Elastic Container Service (ECS)"]; + ecs-->auto; ``` + Cromwell AWS Batch Backend -------------------------- @@ -117,32 +193,14 @@ There are several scala classes as part of the AWS Batch Backend, but the primary classes involved in running the backend are shown below. The arrows represent the flow of job submission. -```text - +----------------------------------------+ - | | - | AwsBatchBackendLifecycleActorFactory | - | | - +------------------+---------------------+ - | - | - | - | - | - +------------------v----------------------+ - | | - | AwsBatchAsyncBackendJobExecutionActor | - | | - +------------------+----------------------+ - | - | - | - | - | - +-------v-------+ +-------------------------+ - | | | | - | AwsBatchJob +-----------------> AwsBatchJobDefinition | - | | | | - +---------------+ +-------------------------+ +```mermaid + flowchart TD; + factory[AwsBatchBackendLifecycleActorFactory] + execution[AwsBatchAsyncBackendJobExecutionActor] + job[AwsBatchJob] + definition[AwsBatchJobDefinition] + + factory-->execution-->job-->definition; ``` 1. The `AwsBatchBackendLifecycleActorFactory` class is configured by the user @@ -161,41 +219,17 @@ arrows represent the flow of job submission. AWS Batch Job Instantiation --------------------------- -```text - +--------------------+ - | | - | Cromwell Backend | - | | - +---------+----------+ - | - | - SubmitJob - | - | - +------v------+ - | | - | AWS Batch | - | | - +------^------+ - | - | - Polls - | - | - +------+------+ - | | - | ECS Agent | - | | - +------+------+ - | - Creates, Launches and Monitors - | - +--------v---------+ - | | - | Task Container | - | | - +------------------+ +```mermaid + flowchart TD + cromwell["Cromwell Backend"] + batch["AWS Batch"] + ecs["ECS Agent"] + task["Task Container"] + + cromwell-- SubmitJob -->batch + batch-- Polls -->ecs + ecs-- Creates, Launches and Monitors -->task ``` When a Cromwell task begins, the Cromwell backend will call the SubmitJob @@ -305,59 +339,63 @@ The flow described below represents the permissions needed by each stage, from Cromwell server through the task running. This includes the permissions needed for the AWS Services involved in the processing of the work. -```text -+----------------------------+ -| | s3:GetObject on bucket for workflow and script bucket -| | s3:ListObjects on script bucket -| | s3:PutObject on script bucket -| Cromwell | batch:RegisterTaskDefinition -| | batch:SubmitJob -| | batch:DescribeJobs -| | batch:DescribeJobDefinitions -+-------------+--------------+ - | - | - | -+-------------v--------------+ -| | AWSBatchServiceRole managed policy - described at: -| AWS Batch | -| | https://docs.aws.amazon.com/batch/latest/userguide/service_IAM_role.html -+-------------+--------------+ - | - | - | -+-------------v--------------+ -| | AWSServiceRoleForECS Service-linked role, documented at: -| | -| Elastic Container Service | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/using-service-linked-roles.html -| | -| (See discussion #1 below) | AmazonEC2ContainerServiceAutoscaleRole managed policy - described at: -| | -| | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/autoscale_IAM_role.html -+-------------+--------------+ - | - | - | -+-------------v--------------+ -| | -| | AmazonEC2ContainerServiceforEC2Role managed policy, described at: -| ECS Agent (running on EC2) | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/instance_IAM_role.html (EC2) -| | OR -| | AmazonECSTaskExecutionRolePolicy managed policy, described at: -| | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_execution_IAM_role.html (Fargate) -+-------------+--------------+ - | - | - | -+-------------v--------------+ -| | Task Role permissions. These are user defined, but ecs-tasks.amazon.com must have sts:AssumeRole trust relationship defined. Documentation: -| Task Container | -| | https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_IAM_role.html -| | s3:GetObject, s3:PutObject, s3:ListObjects -+----------------------------+ +```mermaid + flowchart TD + cromwell["Cromwell"] + batch["AWS Batch"] + ecs["Elastic Container Service\n\n(See discussion #1 below)"] + ec2["ECS Agent (running on EC2)"] + task["Task Container"] + + cromwell_desc["s3:GetObject on bucket for workflow and script bucket + s3:ListObjects on script bucket + s3:PutObject on script bucket + batch:RegisterTaskDefinition + batch:SubmitJob + batch:DescribeJobs + batch:DescribeJobDefinitions"] + + batch_desc["AWSBatchServiceRole managed policy, described here"] + + ecs_desc["AWSServiceRoleForECS Service-linked role, described here + + AmazonEC2ContainerServiceAutoscaleRole managed policy, described here"] + + ec2_desc["(EC2) AmazonEC2ContainerServiceforEC2Role managed policy, described here + + (Fargate) AmazonECSTaskExecutionRolePolicy managed policy, described here"] + + task_desc["Task Role permissions. + These are user defined, but ecs-tasks.amazon.com must have sts:AssumeRole trust relationship defined. + Documentation here + s3:GetObject + s3:PutObject + s3:ListObjects"] + + subgraph 1 [" "] + direction RL + cromwell_desc-->cromwell + end + subgraph 2 [" "] + direction RL + batch_desc-->batch + end + subgraph 3 [" "] + direction RL + ecs_desc-->ecs + end + subgraph 4 [" "] + direction RL + ec2_desc-->ec2 + end + subgraph 5 [" "] + direction RL + task_desc-->task + end + + 1-->2-->3-->4-->5 ``` - 1. ECS has several sets of permissions for various items. AWS Batch, however, does not take advantage of certain features of ECS, most importantly ECS Services are out of scope of AWS Batch. ECS services require things @@ -379,6 +417,7 @@ the AWS Services involved in the processing of the work. NOTE: ECS Agent permissions currently must use the permissions as outlined in the AmazonEC2ContainerServiceForEC2Role managed policy. + Future considerations --------------------- @@ -422,4 +461,4 @@ Cromwell * There is a significant amount of dependent libraries (with more added by the introduction of the S3 filesystem and the AWS SDK). Dependency management is challenging as a result. Adding significant new functionality is relatively - painful when new dependencies are needed. + painful when new dependencies are needed. \ No newline at end of file From 5cac2b2ab979be3b5646277ba7bad837a8bc87b3 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Fri, 25 Mar 2022 05:27:57 +0100 Subject: [PATCH 051/326] revised localization functions to improve stability --- .../backend/impl/aws/AwsBatchJob.scala | 108 ++++++++++++++---- .../backend/impl/aws/AwsBatchJobSpec.scala | 103 ++++++++++++++--- 2 files changed, 175 insertions(+), 36 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index c81bc75fe2b..e79a948377b 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -149,7 +149,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL // this goes at the start of the script after the #! val preamble = s""" - |export AWS_METADATA_SERVICE_TIMEOUT=10 + |export AWS_METADATA_SERVICE_TIMEOUT=10 |export AWS_METADATA_SERVICE_NUM_ATTEMPTS=10 | |function _s3_localize_with_retry() { @@ -157,28 +157,96 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | # destination must be the path to a file and not just the directory you want the file in | local destination=$$2 | - | for i in {1..5}; + | for i in {1..6}; | do - | if [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then - | bucket="$${BASH_REMATCH[1]}" - | key="$${BASH_REMATCH[2]}" - | content_length=$$($awsCmd s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') - | else + | # abort if tries are exhausted + | if [ "$$i" -eq 6 ]; then + | echo "failed to copy $$s3_path after $$(( $$i - 1 )) attempts. aborting" + | exit 2 + | fi + | # check validity of source path + | if ! [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then | echo "$$s3_path is not an S3 path with a bucket and key. aborting" | exit 1 | fi - | $awsCmd s3 cp --no-progress "$$s3_path" "$$destination" && - | [[ $$(LC_ALL=C ls -dn -- "$$destination" | awk '{print $$5; exit}') -eq "$$content_length" ]] && break || - | echo "attempt $$i to copy $$s3_path failed"; + | # copy + | $awsCmd s3 cp --no-progress "$$s3_path" "$$destination" || + | ( echo "attempt $$i to copy $$s3_path failed" sleep $$((7 * "$$i")) && continue) + | # check data integrity + | _check_data_integrity $$destination $$s3_path || + | (echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | # copy succeeded + | break + | done + |} | - | if [ "$$i" -eq 5 ]; then - | echo "failed to copy $$s3_path after $$i attempts. aborting" + |function _s3_delocalize_with_retry() { + | local local_path=$$1 + | # destination must be the path to a file and not just the directory you want the file in + | local destination=$$2 + | + | for i in {1..6}; + | do + | # if tries exceeded : abort + | if [ "$$i" -eq 6 ]; then + | echo "failed to delocalize $$local_path after $$(( $$i - 1 )) attempts. aborting" | exit 2 | fi - | sleep $$((7 * "$$i")) + | # if destination is not a bucket : abort + | if ! [[ $$destination =~ s3://([^/]+)/(.+) ]]; then + | echo "$$destination is not an S3 path with a bucket and key. aborting" + | exit 1 + | fi + | # copy ok or try again. + | if [[ -d "$$local_path" ]]; then + | # make sure to strip the trailing / in destination + | destination=$${destination%/} + | # glob directory. do recursive copy + | $awsCmd s3 cp --no-progress $$local_path $$destination --recursive --exclude "cromwell_glob_control_file" || + | ( echo "attempt $$i to copy globDir $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | # check integrity for each of the files + | for FILE in $$(cd $$local_path ; ls | grep -v cromwell_glob_control_file); do + | _check_data_integrity $$local_path/$$FILE $$destination/$$FILE || + | ( echo "data content length difference detected in attempt $$i to copy $$local_path/$$FILE failed" && sleep $$((7 * "$$i")) && continue 2) + | done + | else + | $awsCmd s3 cp --no-progress "$$local_path" "$$destination" || + | ( echo "attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | # check content length for data integrity + | _check_data_integrity $$local_path $$destination || + | ( echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | fi + | # copy succeeded + | break | done |} | + |function _check_data_integrity() { + | local local_path=$$1 + | local s3_path=$$2 + | + | # remote : use content_length + | if [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then + | bucket="$${BASH_REMATCH[1]}" + | key="$${BASH_REMATCH[2]}" + | else + | # this is already checked in the caller function + | echo "$$s3_path is not an S3 path with a bucket and key. aborting" + | exit 1 + | fi + | s3_content_length=$$($awsCmd s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') || + | ( echo "Attempt to get head of object failed for $$s3_path." && return 1 ) + | # local + | local_content_length=$$(LC_ALL=C ls -dn -- "$$local_path" | awk '{print $$5; exit}' ) || + | ( echo "Attempt to get local content length failed for $$_local_path." && return 1 ) + | # compare + | if [[ "$$s3_content_length" -eq "$$local_content_length" ]]; then + | true + | else + | false + | fi + |} + | |{ |set -e |echo '*** LOCALIZING INPUTS ***' @@ -208,23 +276,23 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL */ s""" |touch ${output.name} - |$awsCmd s3 cp --no-progress ${output.name} ${output.s3key} - |if [ -e $globDirectory ]; then $awsCmd s3 cp --no-progress $globDirectory $s3GlobOutDirectory --recursive --exclude "cromwell_glob_control_file"; fi""".stripMargin + |_s3_delocalize_with_retry ${output.name} ${output.s3key} + |if [ -e $globDirectory ]; then _s3_delocalize_with_retry $globDirectory $s3GlobOutDirectory "; fi""".stripMargin case output: AwsBatchFileOutput if output.s3key.startsWith("s3://") && output.mount.mountPoint.pathAsString == AwsBatchWorkingDisk.MountPoint.pathAsString => //output is on working disk mount - s"""$awsCmd s3 cp --no-progress $workDir/${output.local.pathAsString} ${output.s3key}""".stripMargin + s"""_s3_delocalize_with_retry $workDir/${output.local.pathAsString} ${output.s3key}""".stripMargin case output: AwsBatchFileOutput => //output on a different mount - s"$awsCmd s3 cp --no-progress ${output.mount.mountPoint.pathAsString}/${output.local.pathAsString} ${output.s3key}" + s"_s3_delocalize_with_retry ${output.mount.mountPoint.pathAsString}/${output.local.pathAsString} ${output.s3key}" case _ => "" }.mkString("\n") + "\n" + s""" - |if [ -f $workDir/${jobPaths.returnCodeFilename} ]; then $awsCmd s3 cp --no-progress $workDir/${jobPaths.returnCodeFilename} ${jobPaths.callRoot.pathAsString}/${jobPaths.returnCodeFilename} ; fi - |if [ -f $stdErr ]; then $awsCmd s3 cp --no-progress $stdErr ${jobPaths.standardPaths.error.pathAsString}; fi - |if [ -f $stdOut ]; then $awsCmd s3 cp --no-progress $stdOut ${jobPaths.standardPaths.output.pathAsString}; fi + |if [ -f $workDir/${jobPaths.returnCodeFilename} ]; then _s3_delocalize_with_retry $workDir/${jobPaths.returnCodeFilename} ${jobPaths.callRoot.pathAsString}/${jobPaths.returnCodeFilename} ; fi + |if [ -f $stdErr ]; then _s3_delocalize_with_retry $stdErr ${jobPaths.standardPaths.error.pathAsString}; fi + |if [ -f $stdOut ]; then _s3_delocalize_with_retry $stdOut ${jobPaths.standardPaths.output.pathAsString}; fi |""".stripMargin diff --git a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala index 12e933ab959..2a55f63be1c 100644 --- a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala +++ b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala @@ -182,32 +182,103 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi it should "add s3 localize with retry function to reconfigured script" in { val job = generateBasicJob val retryFunctionText = s""" + |export AWS_METADATA_SERVICE_TIMEOUT=10 + |export AWS_METADATA_SERVICE_NUM_ATTEMPTS=10 + | |function _s3_localize_with_retry() { | local s3_path=$$1 | # destination must be the path to a file and not just the directory you want the file in | local destination=$$2 | - | for i in {1..5}; + | for i in {1..6}; | do - | if [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then - | bucket="$${BASH_REMATCH[1]}" - | key="$${BASH_REMATCH[2]}" - | content_length=$$(/usr/local/aws-cli/v2/current/bin/aws s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') - | else + | # abort if tries are exhausted + | if [ "$$i" -eq 6 ]; then + | echo "failed to copy $$s3_path after $$(( $$i - 1 )) attempts. aborting" + | exit 2 + | fi + | # check validity of source path + | if ! [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then | echo "$$s3_path is not an S3 path with a bucket and key. aborting" | exit 1 | fi - | /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress "$$s3_path" "$$destination" && - | [[ $$(LC_ALL=C ls -dn -- "$$destination" | awk '{print $$5; exit}') -eq "$$content_length" ]] && break || - | echo "attempt $$i to copy $$s3_path failed"; + | # copy + | /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress "$$s3_path" "$$destination" || + | ( echo "attempt $$i to copy $$s3_path failed" sleep $$((7 * "$$i")) && continue) + | # check data integrity + | _check_data_integrity $$destination $$s3_path || + | (echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | # copy succeeded + | break + | done + |} + | + |function _s3_delocalize_with_retry() { + | local local_path=$$1 + | # destination must be the path to a file and not just the directory you want the file in + | local destination=$$2 | - | if [ "$$i" -eq 5 ]; then - | echo "failed to copy $$s3_path after $$i attempts. aborting" + | for i in {1..6}; + | do + | # if tries exceeded : abort + | if [ "$$i" -eq 6 ]; then + | echo "failed to delocalize $$local_path after $$(( $$i - 1 )) attempts. aborting" | exit 2 | fi - | sleep $$((7 * "$$i")) + | # if destination is not a bucket : abort + | if ! [[ $$destination =~ s3://([^/]+)/(.+) ]]; then + | echo "$$destination is not an S3 path with a bucket and key. aborting" + | exit 1 + | fi + | # copy ok or try again. + | if [[ -d "$$local_path" ]]; then + | # make sure to strip the trailing / in destination + | destination=$${destination%/} + | # glob directory. do recursive copy + | /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress $$local_path $$destination --recursive --exclude "cromwell_glob_control_file" || + | ( echo "attempt $$i to copy globDir $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | # check integrity for each of the files + | for FILE in $$(cd $$local_path ; ls | grep -v cromwell_glob_control_file); do + | _check_data_integrity $$local_path/$$FILE $$destination/$$FILE || + | ( echo "data content length difference detected in attempt $$i to copy $$local_path/$$FILE failed" && sleep $$((7 * "$$i")) && continue 2) + | done + | else + | /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress "$$local_path" "$$destination" || + | ( echo "attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | # check content length for data integrity + | _check_data_integrity $$local_path $$destination || + | ( echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | fi + | # copy succeeded + | break | done |} + | + |function _check_data_integrity() { + | local local_path=$$1 + | local s3_path=$$2 + | + | # remote : use content_length + | if [[ $$s3_path =~ s3://([^/]+)/(.+) ]]; then + | bucket="$${BASH_REMATCH[1]}" + | key="$${BASH_REMATCH[2]}" + | else + | # this is already checked in the caller function + | echo "$$s3_path is not an S3 path with a bucket and key. aborting" + | exit 1 + | fi + | s3_content_length=$$(/usr/local/aws-cli/v2/current/bin/aws s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') || + | ( echo "Attempt to get head of object failed for $$s3_path." && return 1 ) + | # local + | local_content_length=$$(LC_ALL=C ls -dn -- "$$local_path" | awk '{print $$5; exit}' ) || + | ( echo "Attempt to get local content length failed for $$_local_path." && return 1 ) + | # compare + | if [[ "$$s3_content_length" -eq "$$local_content_length" ]]; then + | true + | else + | false + | fi + |} |""".stripMargin job.reconfiguredScript should include (retryFunctionText) @@ -221,13 +292,13 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi |set -e |echo '*** DELOCALIZING OUTPUTS ***' | - |/usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress /tmp/scratch/baa s3://bucket/somewhere/baa + |_s3_delocalize_with_retry /tmp/scratch/baa s3://bucket/somewhere/baa | | - |if [ -f /tmp/scratch/hello-rc.txt ]; then /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress /tmp/scratch/hello-rc.txt ${job.jobPaths.returnCode} ; fi + |if [ -f /tmp/scratch/hello-rc.txt ]; then _s3_delocalize_with_retry /tmp/scratch/hello-rc.txt ${job.jobPaths.returnCode} ; fi | - |if [ -f /tmp/scratch/hello-stderr.log ]; then /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress /tmp/scratch/hello-stderr.log ${job.jobPaths.standardPaths.error}; fi - |if [ -f /tmp/scratch/hello-stdout.log ]; then /usr/local/aws-cli/v2/current/bin/aws s3 cp --no-progress /tmp/scratch/hello-stdout.log ${job.jobPaths.standardPaths.output}; fi + |if [ -f /tmp/scratch/hello-stderr.log ]; then _s3_delocalize_with_retrys /tmp/scratch/hello-stderr.log ${job.jobPaths.standardPaths.error}; fi + |if [ -f /tmp/scratch/hello-stdout.log ]; then _s3_delocalize_with_retry /tmp/scratch/hello-stdout.log ${job.jobPaths.standardPaths.output}; fi | |echo '*** COMPLETED DELOCALIZATION ***' |echo '*** EXITING WITH RETURN CODE ***' From 7a4ec48c9a9503dac87d3a7bf6a410df6421ecfb Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Fri, 25 Mar 2022 11:17:39 +0000 Subject: [PATCH 052/326] fix tests --- .../scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala index 2a55f63be1c..3de115e4f97 100644 --- a/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala +++ b/supportedBackends/aws/src/test/scala/cromwell/backend/impl/aws/AwsBatchJobSpec.scala @@ -128,21 +128,21 @@ class AwsBatchJobSpec extends TestKitSuite with AnyFlatSpecLike with Matchers wi val job = AwsBatchJob(null, runtimeAttributes, "commandLine", script, "/cromwell_root/hello-rc.txt", "/cromwell_root/hello-stdout.log", "/cromwell_root/hello-stderr.log", Seq.empty[AwsBatchInput].toSet, Seq.empty[AwsBatchFileOutput].toSet, - jobPaths, Seq.empty[AwsBatchParameter], None) + jobPaths, Seq.empty[AwsBatchParameter], None, None) job } private def generateBasicJobForLocalFS: AwsBatchJob = { val job = AwsBatchJob(null, runtimeAttributes.copy(fileSystem="local"), "commandLine", script, "/cromwell_root/hello-rc.txt", "/cromwell_root/hello-stdout.log", "/cromwell_root/hello-stderr.log", Seq.empty[AwsBatchInput].toSet, Seq.empty[AwsBatchFileOutput].toSet, - jobPaths, Seq.empty[AwsBatchParameter], None) + jobPaths, Seq.empty[AwsBatchParameter], None, None) job } private def generateJobWithS3InOut: AwsBatchJob = { val job = AwsBatchJob(null, runtimeAttributes, "commandLine", script, "/cromwell_root/hello-rc.txt", "/cromwell_root/hello-stdout.log", "/cromwell_root/hello-stderr.log", s3Inputs, s3Outputs, - jobPaths, Seq.empty[AwsBatchParameter], None) + jobPaths, Seq.empty[AwsBatchParameter], None, None) job } From 2d447df86baa986396c76bbf05dfbe54e7f8cc48 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Wed, 30 Mar 2022 19:50:31 +0100 Subject: [PATCH 053/326] update readme --- README.md | 4 +++- .../src/main/scala/cromwell/backend/impl/aws/README.md | 10 ++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bb313473f58..adecd7d31fd 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,12 @@ [![Build Status](https://travis-ci.com/broadinstitute/cromwell.svg?branch=develop)](https://travis-ci.com/broadinstitute/cromwell?branch=develop) [![codecov](https://codecov.io/gh/broadinstitute/cromwell/branch/develop/graph/badge.svg)](https://codecov.io/gh/broadinstitute/cromwell) -## Welcome to "AWS-friendly" Cromwell +## Welcome to the "AWS-friendly" Cromwell More information regarding AWS features can be found [here](https://github.com/henriqueribeiro/cromwell/tree/master/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws) +Contact: henrique [at] loka [dot] com + Cromwell is an open-source Workflow Management System for bioinformatics. Licensing is [BSD 3-Clause](LICENSE.txt). The [Cromwell documentation has a dedicated site](https://cromwell.readthedocs.io/en/stable). diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index b692e356cc1..62a0495c1be 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -42,6 +42,8 @@ Docker Hub authentication for AWS Backend enable users to access and use private dockerhub { token = "" } ``` +Stack must be deployed through https://github.com/aws-samples/aws-genomics-workflows. + ### `awsBatchRetryAttempts` *Default: _0_* @@ -50,6 +52,8 @@ This runtime attribute adds support to [*AWS Batch Automated Job Retries*](https It takes an Int, between 1 and 10, as a value that indicates the maximum number of times AWS Batch should retry a failed task. If the value 0 is passed, the [*Retry Strategy*](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#retryStrategy) will not be added to the job definiton and the task will run just once. +This configuration should be passed in the `options.json` file when launching the pipeline. + ``` runtime { awsBatchRetryAttempts: integer @@ -62,6 +66,8 @@ runtime { A list of [`ulimits`](https://docs.aws.amazon.com/batch/latest/userguide/job_definition_parameters.html#containerProperties) values to set in the container. This parameter maps to `Ulimits` in the [Create a container](https://docs.docker.com/engine/api/v1.38/) section of the [Docker Remote API](https://docs.docker.com/engine/api/v1.38/) and the `--ulimit` option to [docker run](https://docs.docker.com/engine/reference/commandline/run/). +This configuration should be passed in the `options.json` file when launching the pipeline. + ``` "ulimits": [ { @@ -95,7 +101,7 @@ AWS ECR is a private container registry, for which access can be regulated using 1. Setup a user with pull-access to ECR, then use this role to run cromwell -profile default region must be setup in ~/.aws/config: +profile default region must be setup in `~/.aws/config`: ``` [profile MyECR-user] region = eu-west-1 @@ -459,4 +465,4 @@ Cromwell * There is a significant amount of dependent libraries (with more added by the introduction of the S3 filesystem and the AWS SDK). Dependency management is challenging as a result. Adding significant new functionality is relatively - painful when new dependencies are needed. \ No newline at end of file + painful when new dependencies are needed. From a85b6f66acad2e9633a0ed9334e7616786d8b85e Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Mon, 4 Apr 2022 08:39:38 +0200 Subject: [PATCH 054/326] Corrected braces in localization scripts to enable loop control --- .../cromwell/backend/impl/aws/AwsBatchJob.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index e79a948377b..3bcdb2de5de 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -171,10 +171,10 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | fi | # copy | $awsCmd s3 cp --no-progress "$$s3_path" "$$destination" || - | ( echo "attempt $$i to copy $$s3_path failed" sleep $$((7 * "$$i")) && continue) + | { echo "attempt $$i to copy $$s3_path failed" sleep $$((7 * "$$i")) && continue; } | # check data integrity | _check_data_integrity $$destination $$s3_path || - | (echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | { echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } | # copy succeeded | break | done @@ -203,18 +203,18 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | destination=$${destination%/} | # glob directory. do recursive copy | $awsCmd s3 cp --no-progress $$local_path $$destination --recursive --exclude "cromwell_glob_control_file" || - | ( echo "attempt $$i to copy globDir $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | { echo "attempt $$i to copy globDir $$local_path failed" && sleep $$((7 * "$$i")) && continue; } | # check integrity for each of the files | for FILE in $$(cd $$local_path ; ls | grep -v cromwell_glob_control_file); do | _check_data_integrity $$local_path/$$FILE $$destination/$$FILE || - | ( echo "data content length difference detected in attempt $$i to copy $$local_path/$$FILE failed" && sleep $$((7 * "$$i")) && continue 2) + | { echo "data content length difference detected in attempt $$i to copy $$local_path/$$FILE failed" && sleep $$((7 * "$$i")) && continue 2; } | done | else | $awsCmd s3 cp --no-progress "$$local_path" "$$destination" || - | ( echo "attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | { echo "attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } | # check content length for data integrity | _check_data_integrity $$local_path $$destination || - | ( echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | { echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } | fi | # copy succeeded | break @@ -235,10 +235,10 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | exit 1 | fi | s3_content_length=$$($awsCmd s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') || - | ( echo "Attempt to get head of object failed for $$s3_path." && return 1 ) + | { echo "Attempt to get head of object failed for $$s3_path." && return 1 ; } | # local | local_content_length=$$(LC_ALL=C ls -dn -- "$$local_path" | awk '{print $$5; exit}' ) || - | ( echo "Attempt to get local content length failed for $$_local_path." && return 1 ) + | { echo "Attempt to get local content length failed for $$_local_path." && return 1; } | # compare | if [[ "$$s3_content_length" -eq "$$local_content_length" ]]; then | true From 7462b6865635da8fdd8ceac48e3a1ea99a4e7b91 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Mon, 4 Apr 2022 08:40:48 +0200 Subject: [PATCH 055/326] Added support for Retry in case of time-out errors --- .../engine/io/RetryableRequestSupport.scala | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala b/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala index 03ef9224b7d..9efa3e7c767 100644 --- a/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala +++ b/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala @@ -26,7 +26,7 @@ object RetryableRequestSupport { case _: SocketException => true case _: SocketTimeoutException => true case ioE: IOException if Option(ioE.getMessage).exists(_.contains("Error getting access token for service account")) => true - case ioE: IOException => isGcs500(ioE) || isGcs503(ioE) || isGcs504(ioE) + case ioE: IOException => isGcs500(ioE) || isGcs503(ioE) || isGcs504(ioE) || isAws504(ioE) case other => // Infinitely retryable is a subset of retryable isInfinitelyRetryable(other) @@ -86,4 +86,28 @@ object RetryableRequestSupport { msg.contains("504 Gateway Timeout") ) } + // AWS timeout error + def isAws504(failure: Throwable): Boolean = { + Option(failure.getMessage).exists(msg => + ( + // timeout in reading form s3. + msg.contains("Could not read from s3") && + msg.contains("Timeout waiting for connection") + ) || ( + // reading in cromwell wdl (read_lines() etc) + msg.contains("Failed to evaluate") && + msg.contains("s3://") && + msg.contains("Timed out after") + ) + ) + } + // General AWS IO error : all items unreadable except rc.txt files (might be missing) + // => mainly for testing. will retry mis-specified s3 paths as well... + def isAwsIO(failure:Throwable): Boolean = { + Option(failure.getMessage).exists(msg => + msg.contains("Could not read from s3") && + ! msg.contains("-rc.txt") + ) + } + } From 40adf36b3e73fdea1bb0b0354e9c419a16dc9889 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Mon, 4 Apr 2022 08:41:40 +0200 Subject: [PATCH 056/326] Enabled support for retryWithMoreMemory for AWS/Batch --- .../StandardAsyncExecutionActor.scala | 128 ++++++++++-------- ...wsBatchAsyncBackendJobExecutionActor.scala | 37 ++++- 2 files changed, 107 insertions(+), 58 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 9b21871b538..2ef8b7bd5b7 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -1265,70 +1265,16 @@ trait StandardAsyncExecutionActor def handleExecutionResult(status: StandardAsyncRunState, oldHandle: StandardAsyncPendingExecutionHandle): Future[ExecutionHandle] = { - // get the memory retry code. - def memoryRetryRC: Future[Boolean] = { - // convert int to boolean - def returnCodeAsBoolean(codeAsOption: Option[String]): Boolean = { - codeAsOption match { - case Some(codeAsString) => - Try(codeAsString.trim.toInt) match { - case Success(code) => code match { - case StderrContainsRetryKeysCode => true - case _ => false - } - case Failure(e) => - log.error(s"'CheckingForMemoryRetry' action exited with code '$codeAsString' which couldn't be " + - s"converted to an Integer. Task will not be retried with more memory. Error: ${ExceptionUtils.getMessage(e)}") - false - } - case None => false - } - } - // read if the file exists - def readMemoryRetryRCFile(fileExists: Boolean): Future[Option[String]] = { - if (fileExists) - asyncIo.contentAsStringAsync(jobPaths.memoryRetryRC, None, failOnOverflow = false).map(Option(_)) - else - Future.successful(None) - } - //finally : assign the yielded variable - for { - fileExists <- asyncIo.existsAsync(jobPaths.memoryRetryRC) - retryCheckRCAsOption <- readMemoryRetryRCFile(fileExists) - retryWithMoreMemory = returnCodeAsBoolean(retryCheckRCAsOption) - } yield retryWithMoreMemory - } - - // get the exit code of the job. - def JobExitCode: Future[String] = { - - // read if the file exists - def readRCFile(fileExists: Boolean): Future[String] = { - if (fileExists) - asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) - else { - jobLogger.warn("RC file not found. Setting job to failed & waiting 5m before retry.") - Thread.sleep(300000) - Future("1") - } - } - //finally : assign the yielded variable - for { - fileExists <- asyncIo.existsAsync(jobPaths.returnCode) - jobRC <- readRCFile(fileExists) - } yield jobRC - } - // get path to sderr val stderr = jobPaths.standardPaths.error lazy val stderrAsOption: Option[Path] = Option(stderr) - // get the three needed variables, using functions above or direct assignment. + // get the three needed variables, using helper functions below, or direct assignment. val stderrSizeAndReturnCodeAndMemoryRetry = for { returnCodeAsString <- JobExitCode // Only check stderr size if we need to, otherwise this results in a lot of unnecessary I/O that // may fail due to race conditions on quickly-executing jobs. stderrSize <- if (failOnStdErr) asyncIo.sizeAsync(stderr) else Future.successful(0L) - retryWithMoreMemory <- memoryRetryRC + retryWithMoreMemory <- memoryRetryRC(oldHandle.pendingJob) } yield (stderrSize, returnCodeAsString, retryWithMoreMemory) stderrSizeAndReturnCodeAndMemoryRetry flatMap { @@ -1337,25 +1283,36 @@ trait StandardAsyncExecutionActor if (isDone(status)) { tryReturnCodeAsInt match { + // stderr not empty : retry case Success(returnCodeAsInt) if failOnStdErr && stderrSize.intValue > 0 => val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(StderrNonEmpty(jobDescriptor.key.tag, stderrSize, stderrAsOption), Option(returnCodeAsInt), None)) retryElseFail(executionHandle) - case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) => + // job was aborted (cancelled by user?) + // on AWS OOM kill are code 137 : check retryWithMoreMemory here + case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) && !retryWithMoreMemory => + jobLogger.debug(s"Job was aborted, code was : '${returnCodeAsString}'") Future.successful(AbortedExecutionHandle) + // job considered ok by accepted exit code case Success(returnCodeAsInt) if continueOnReturnCode.continueFor(returnCodeAsInt) => handleExecutionSuccess(status, oldHandle, returnCodeAsInt) + // job failed on out-of-memory : retry case Success(returnCodeAsInt) if retryWithMoreMemory => + jobLogger.warn(s"Retrying job due to OOM with exit code : '${returnCodeAsString}' ") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), Option(returnCodeAsInt), None)) retryElseFail(executionHandle, retryWithMoreMemory) + // unaccepted return code : retry. case Success(returnCodeAsInt) => + jobLogger.debug(s"Retrying with wrong exit code : '${returnCodeAsString}'") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(WrongReturnCode(jobDescriptor.key.tag, returnCodeAsInt, stderrAsOption), Option(returnCodeAsInt), None)) retryElseFail(executionHandle) case Failure(_) => + jobLogger.warn(s"General failure of job with exit code : '${returnCodeAsString}'") Future.successful(FailedNonRetryableExecutionHandle(ReturnCodeIsNotAnInt(jobDescriptor.key.tag, returnCodeAsString, stderrAsOption), kvPairsToSave = None)) } } else { tryReturnCodeAsInt match { case Success(returnCodeAsInt) if retryWithMoreMemory && !continueOnReturnCode.continueFor(returnCodeAsInt) => + jobLogger.debug(s"job not done but retrying already? : ${status.toString()}") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), Option(returnCodeAsInt), None)) retryElseFail(executionHandle, retryWithMoreMemory) case _ => @@ -1373,6 +1330,63 @@ trait StandardAsyncExecutionActor } } + // helper function for handleExecutionResult : get the exit code of the job. + def JobExitCode: Future[String] = { + + // read if the file exists + def readRCFile(fileExists: Boolean): Future[String] = { + if (fileExists) + asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) + else { + jobLogger.warn("RC file not found. Setting job to failed & retry.") + //Thread.sleep(300000) + Future("1") + } + } + //finally : assign the yielded variable + for { + fileExists <- asyncIo.existsAsync(jobPaths.returnCode) + jobRC <- readRCFile(fileExists) + } yield jobRC + } + + // helper function for handleExecutionResult : get the memory retry code. + def memoryRetryRC(job: StandardAsyncJob): Future[Boolean] = { + // job is used in aws override version. use here to prevent compilation error. + log.debug(s"Looking for memoryRetry in job '${job.jobId}'") + // convert int to boolean + def returnCodeAsBoolean(codeAsOption: Option[String]): Boolean = { + codeAsOption match { + case Some(codeAsString) => + Try(codeAsString.trim.toInt) match { + case Success(code) => code match { + case StderrContainsRetryKeysCode => true + case _ => false + } + case Failure(e) => + log.error(s"'CheckingForMemoryRetry' action exited with code '$codeAsString' which couldn't be " + + s"converted to an Integer. Task will not be retried with more memory. Error: ${ExceptionUtils.getMessage(e)}") + false + } + case None => false + } + } + // read if the file exists + def readMemoryRetryRCFile(fileExists: Boolean): Future[Option[String]] = { + if (fileExists) + asyncIo.contentAsStringAsync(jobPaths.memoryRetryRC, None, failOnOverflow = false).map(Option(_)) + else + Future.successful(None) + } + //finally : assign the yielded variable + for { + fileExists <- asyncIo.existsAsync(jobPaths.memoryRetryRC) + retryCheckRCAsOption <- readMemoryRetryRCFile(fileExists) + retryWithMoreMemory = returnCodeAsBoolean(retryCheckRCAsOption) + } yield retryWithMoreMemory + } + + /** * Send the job id of the running job to the key value store. * diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 34334449587..a534ae2b8da 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -57,7 +57,10 @@ import cromwell.filesystems.s3.S3Path import cromwell.filesystems.s3.batch.S3BatchCommandBuilder import cromwell.services.keyvalue.KvClient import org.slf4j.{Logger, LoggerFactory} -import software.amazon.awssdk.services.batch.model.{BatchException, SubmitJobResponse} +import software.amazon.awssdk.services.batch.BatchClient +//import software.amazon.awssdk.services.batch.model.{BatchException, SubmitJobResponse} +import software.amazon.awssdk.services.batch.model._ + import wom.callable.Callable.OutputDefinition import wom.core.FullyQualifiedName import wom.expression.NoIoFunctionSet @@ -184,6 +187,13 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar Option(configuration.awsAuth), configuration.fsxMntPoint) } + + // setup batch client to query job container info + lazy val batchClient: BatchClient = { + val builder = BatchClient.builder() + configureClient(builder, batchJob.optAwsAuthMode, batchJob.configRegion) + } + /* Tries to abort the job in flight * * @param job A StandardAsyncJob object (has jobId value) to cancel @@ -514,6 +524,31 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar } yield guaranteedAnswer } + // new OOM detection + override def memoryRetryRC(job: StandardAsyncJob): Future[Boolean] = Future { + Log.debug(s"Looking for memoryRetry in job '${job.jobId}'") + val describeJobsResponse = batchClient.describeJobs(DescribeJobsRequest.builder.jobs(job.jobId).build) + val jobDetail = describeJobsResponse.jobs.get(0) //OrElse(throw new RuntimeException(s"Could not get job details for job '${job.jobId}'")) + val nrAttempts = jobDetail.attempts.size + val lastattempt = jobDetail.attempts.get(nrAttempts-1) + val containerRC = lastattempt.container.exitCode + // if not zero => get reason, else set retry to false. + containerRC.toString() match { + case "0" => + Log.debug("container exit code was zero") + false + case _ => + val containerStatusReason = lastattempt.container.reason + Log.warn(s"Job failed with Container status reason : '${containerStatusReason}'") + val RetryMemoryKeys = memoryRetryErrorKeys.toList.flatten + val retry = RetryMemoryKeys.exists(containerStatusReason.contains) + Log.debug(s"Retry job based on provided keys : '${retry}'") + retry + } + + + } + // Despite being a "runtime" exception, BatchExceptions for 429 (too many requests) are *not* fatal: override def isFatal(throwable: Throwable): Boolean = throwable match { case be: BatchException => !be.getMessage.contains("Status Code: 429") From 0e25349bb8dca295e5ec1c08afce449939441724 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Mon, 4 Apr 2022 14:04:35 +0200 Subject: [PATCH 057/326] bugfix : removed incorrect quote --- .../src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 3bcdb2de5de..757c7ab7b87 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -277,7 +277,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL s""" |touch ${output.name} |_s3_delocalize_with_retry ${output.name} ${output.s3key} - |if [ -e $globDirectory ]; then _s3_delocalize_with_retry $globDirectory $s3GlobOutDirectory "; fi""".stripMargin + |if [ -e $globDirectory ]; then _s3_delocalize_with_retry $globDirectory $s3GlobOutDirectory ; fi""".stripMargin case output: AwsBatchFileOutput if output.s3key.startsWith("s3://") && output.mount.mountPoint.pathAsString == AwsBatchWorkingDisk.MountPoint.pathAsString => From 320208f2d0e53658057e7e40dfa12158f44729fa Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Mon, 4 Apr 2022 14:17:53 +0200 Subject: [PATCH 058/326] added memory-retry documentation --- .../scala/cromwell/backend/impl/aws/README.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index 62a0495c1be..df83472bdc2 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -133,6 +133,48 @@ Notes: - local hashing means that all used containers are pulled. Make sure you have enough storage - enable a database to make the cache persistent over cromwell restarts +### Retry with more memory + +Cromwell can be configured to retry jobs with more allocated memory, under a defined set of conditions. To enable this, set the following parameters: + +cromwell configuration: `cromwell.config`: +``` +// set the maximal amount of retries. +// backend.providers.AWSBatch.config.default-runtime-attribues.maxRetries +backend { + providers { + AWSBatch { + config { + default-runtime-attributes { + maxRetries: 6 + } + } + } + } +} + +// set the keys for Out-Of-Memory killing. +// system.io.memory-retry-error-keys +system{ + io{ + memory-retry-error-keys = ["OutOfMemory","Killed"] + } +} +``` + +Workflow specific runtime options : `workflow_options.json`: +``` +{ + "memory_retry_multiplier" : 1.5 +} +``` + +When providing the options.json file during workflow submission, jobs that were terminated due to insufficient memory will be retried 6 times, with increasing memory allocation. For example 4Gb => 6Gb => 9Gb => 13.5Gb => ... + +Note: Retries of jobs using the `awsBatchRetryAttempts` counter do *not* increase memory allocation. + + + AWS Batch --------- From 22bc5d7dcc4a85a02de3319180c4bab197ccd1d8 Mon Sep 17 00:00:00 2001 From: henriqueribeiro Date: Mon, 4 Apr 2022 18:18:40 +0100 Subject: [PATCH 059/326] Revert "Extra failure handling for Batch" --- .../StandardAsyncExecutionActor.scala | 128 ++++++++---------- .../engine/io/RetryableRequestSupport.scala | 26 +--- ...wsBatchAsyncBackendJobExecutionActor.scala | 37 +---- .../backend/impl/aws/AwsBatchJob.scala | 18 +-- .../scala/cromwell/backend/impl/aws/README.md | 42 ------ 5 files changed, 68 insertions(+), 183 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 2ef8b7bd5b7..9b21871b538 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -1265,16 +1265,70 @@ trait StandardAsyncExecutionActor def handleExecutionResult(status: StandardAsyncRunState, oldHandle: StandardAsyncPendingExecutionHandle): Future[ExecutionHandle] = { + // get the memory retry code. + def memoryRetryRC: Future[Boolean] = { + // convert int to boolean + def returnCodeAsBoolean(codeAsOption: Option[String]): Boolean = { + codeAsOption match { + case Some(codeAsString) => + Try(codeAsString.trim.toInt) match { + case Success(code) => code match { + case StderrContainsRetryKeysCode => true + case _ => false + } + case Failure(e) => + log.error(s"'CheckingForMemoryRetry' action exited with code '$codeAsString' which couldn't be " + + s"converted to an Integer. Task will not be retried with more memory. Error: ${ExceptionUtils.getMessage(e)}") + false + } + case None => false + } + } + // read if the file exists + def readMemoryRetryRCFile(fileExists: Boolean): Future[Option[String]] = { + if (fileExists) + asyncIo.contentAsStringAsync(jobPaths.memoryRetryRC, None, failOnOverflow = false).map(Option(_)) + else + Future.successful(None) + } + //finally : assign the yielded variable + for { + fileExists <- asyncIo.existsAsync(jobPaths.memoryRetryRC) + retryCheckRCAsOption <- readMemoryRetryRCFile(fileExists) + retryWithMoreMemory = returnCodeAsBoolean(retryCheckRCAsOption) + } yield retryWithMoreMemory + } + + // get the exit code of the job. + def JobExitCode: Future[String] = { + + // read if the file exists + def readRCFile(fileExists: Boolean): Future[String] = { + if (fileExists) + asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) + else { + jobLogger.warn("RC file not found. Setting job to failed & waiting 5m before retry.") + Thread.sleep(300000) + Future("1") + } + } + //finally : assign the yielded variable + for { + fileExists <- asyncIo.existsAsync(jobPaths.returnCode) + jobRC <- readRCFile(fileExists) + } yield jobRC + } + // get path to sderr val stderr = jobPaths.standardPaths.error lazy val stderrAsOption: Option[Path] = Option(stderr) - // get the three needed variables, using helper functions below, or direct assignment. + // get the three needed variables, using functions above or direct assignment. val stderrSizeAndReturnCodeAndMemoryRetry = for { returnCodeAsString <- JobExitCode // Only check stderr size if we need to, otherwise this results in a lot of unnecessary I/O that // may fail due to race conditions on quickly-executing jobs. stderrSize <- if (failOnStdErr) asyncIo.sizeAsync(stderr) else Future.successful(0L) - retryWithMoreMemory <- memoryRetryRC(oldHandle.pendingJob) + retryWithMoreMemory <- memoryRetryRC } yield (stderrSize, returnCodeAsString, retryWithMoreMemory) stderrSizeAndReturnCodeAndMemoryRetry flatMap { @@ -1283,36 +1337,25 @@ trait StandardAsyncExecutionActor if (isDone(status)) { tryReturnCodeAsInt match { - // stderr not empty : retry case Success(returnCodeAsInt) if failOnStdErr && stderrSize.intValue > 0 => val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(StderrNonEmpty(jobDescriptor.key.tag, stderrSize, stderrAsOption), Option(returnCodeAsInt), None)) retryElseFail(executionHandle) - // job was aborted (cancelled by user?) - // on AWS OOM kill are code 137 : check retryWithMoreMemory here - case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) && !retryWithMoreMemory => - jobLogger.debug(s"Job was aborted, code was : '${returnCodeAsString}'") + case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) => Future.successful(AbortedExecutionHandle) - // job considered ok by accepted exit code case Success(returnCodeAsInt) if continueOnReturnCode.continueFor(returnCodeAsInt) => handleExecutionSuccess(status, oldHandle, returnCodeAsInt) - // job failed on out-of-memory : retry case Success(returnCodeAsInt) if retryWithMoreMemory => - jobLogger.warn(s"Retrying job due to OOM with exit code : '${returnCodeAsString}' ") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), Option(returnCodeAsInt), None)) retryElseFail(executionHandle, retryWithMoreMemory) - // unaccepted return code : retry. case Success(returnCodeAsInt) => - jobLogger.debug(s"Retrying with wrong exit code : '${returnCodeAsString}'") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(WrongReturnCode(jobDescriptor.key.tag, returnCodeAsInt, stderrAsOption), Option(returnCodeAsInt), None)) retryElseFail(executionHandle) case Failure(_) => - jobLogger.warn(s"General failure of job with exit code : '${returnCodeAsString}'") Future.successful(FailedNonRetryableExecutionHandle(ReturnCodeIsNotAnInt(jobDescriptor.key.tag, returnCodeAsString, stderrAsOption), kvPairsToSave = None)) } } else { tryReturnCodeAsInt match { case Success(returnCodeAsInt) if retryWithMoreMemory && !continueOnReturnCode.continueFor(returnCodeAsInt) => - jobLogger.debug(s"job not done but retrying already? : ${status.toString()}") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), Option(returnCodeAsInt), None)) retryElseFail(executionHandle, retryWithMoreMemory) case _ => @@ -1330,63 +1373,6 @@ trait StandardAsyncExecutionActor } } - // helper function for handleExecutionResult : get the exit code of the job. - def JobExitCode: Future[String] = { - - // read if the file exists - def readRCFile(fileExists: Boolean): Future[String] = { - if (fileExists) - asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) - else { - jobLogger.warn("RC file not found. Setting job to failed & retry.") - //Thread.sleep(300000) - Future("1") - } - } - //finally : assign the yielded variable - for { - fileExists <- asyncIo.existsAsync(jobPaths.returnCode) - jobRC <- readRCFile(fileExists) - } yield jobRC - } - - // helper function for handleExecutionResult : get the memory retry code. - def memoryRetryRC(job: StandardAsyncJob): Future[Boolean] = { - // job is used in aws override version. use here to prevent compilation error. - log.debug(s"Looking for memoryRetry in job '${job.jobId}'") - // convert int to boolean - def returnCodeAsBoolean(codeAsOption: Option[String]): Boolean = { - codeAsOption match { - case Some(codeAsString) => - Try(codeAsString.trim.toInt) match { - case Success(code) => code match { - case StderrContainsRetryKeysCode => true - case _ => false - } - case Failure(e) => - log.error(s"'CheckingForMemoryRetry' action exited with code '$codeAsString' which couldn't be " + - s"converted to an Integer. Task will not be retried with more memory. Error: ${ExceptionUtils.getMessage(e)}") - false - } - case None => false - } - } - // read if the file exists - def readMemoryRetryRCFile(fileExists: Boolean): Future[Option[String]] = { - if (fileExists) - asyncIo.contentAsStringAsync(jobPaths.memoryRetryRC, None, failOnOverflow = false).map(Option(_)) - else - Future.successful(None) - } - //finally : assign the yielded variable - for { - fileExists <- asyncIo.existsAsync(jobPaths.memoryRetryRC) - retryCheckRCAsOption <- readMemoryRetryRCFile(fileExists) - retryWithMoreMemory = returnCodeAsBoolean(retryCheckRCAsOption) - } yield retryWithMoreMemory - } - - /** * Send the job id of the running job to the key value store. * diff --git a/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala b/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala index 9efa3e7c767..03ef9224b7d 100644 --- a/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala +++ b/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala @@ -26,7 +26,7 @@ object RetryableRequestSupport { case _: SocketException => true case _: SocketTimeoutException => true case ioE: IOException if Option(ioE.getMessage).exists(_.contains("Error getting access token for service account")) => true - case ioE: IOException => isGcs500(ioE) || isGcs503(ioE) || isGcs504(ioE) || isAws504(ioE) + case ioE: IOException => isGcs500(ioE) || isGcs503(ioE) || isGcs504(ioE) case other => // Infinitely retryable is a subset of retryable isInfinitelyRetryable(other) @@ -86,28 +86,4 @@ object RetryableRequestSupport { msg.contains("504 Gateway Timeout") ) } - // AWS timeout error - def isAws504(failure: Throwable): Boolean = { - Option(failure.getMessage).exists(msg => - ( - // timeout in reading form s3. - msg.contains("Could not read from s3") && - msg.contains("Timeout waiting for connection") - ) || ( - // reading in cromwell wdl (read_lines() etc) - msg.contains("Failed to evaluate") && - msg.contains("s3://") && - msg.contains("Timed out after") - ) - ) - } - // General AWS IO error : all items unreadable except rc.txt files (might be missing) - // => mainly for testing. will retry mis-specified s3 paths as well... - def isAwsIO(failure:Throwable): Boolean = { - Option(failure.getMessage).exists(msg => - msg.contains("Could not read from s3") && - ! msg.contains("-rc.txt") - ) - } - } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index a534ae2b8da..34334449587 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -57,10 +57,7 @@ import cromwell.filesystems.s3.S3Path import cromwell.filesystems.s3.batch.S3BatchCommandBuilder import cromwell.services.keyvalue.KvClient import org.slf4j.{Logger, LoggerFactory} -import software.amazon.awssdk.services.batch.BatchClient -//import software.amazon.awssdk.services.batch.model.{BatchException, SubmitJobResponse} -import software.amazon.awssdk.services.batch.model._ - +import software.amazon.awssdk.services.batch.model.{BatchException, SubmitJobResponse} import wom.callable.Callable.OutputDefinition import wom.core.FullyQualifiedName import wom.expression.NoIoFunctionSet @@ -187,13 +184,6 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar Option(configuration.awsAuth), configuration.fsxMntPoint) } - - // setup batch client to query job container info - lazy val batchClient: BatchClient = { - val builder = BatchClient.builder() - configureClient(builder, batchJob.optAwsAuthMode, batchJob.configRegion) - } - /* Tries to abort the job in flight * * @param job A StandardAsyncJob object (has jobId value) to cancel @@ -524,31 +514,6 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar } yield guaranteedAnswer } - // new OOM detection - override def memoryRetryRC(job: StandardAsyncJob): Future[Boolean] = Future { - Log.debug(s"Looking for memoryRetry in job '${job.jobId}'") - val describeJobsResponse = batchClient.describeJobs(DescribeJobsRequest.builder.jobs(job.jobId).build) - val jobDetail = describeJobsResponse.jobs.get(0) //OrElse(throw new RuntimeException(s"Could not get job details for job '${job.jobId}'")) - val nrAttempts = jobDetail.attempts.size - val lastattempt = jobDetail.attempts.get(nrAttempts-1) - val containerRC = lastattempt.container.exitCode - // if not zero => get reason, else set retry to false. - containerRC.toString() match { - case "0" => - Log.debug("container exit code was zero") - false - case _ => - val containerStatusReason = lastattempt.container.reason - Log.warn(s"Job failed with Container status reason : '${containerStatusReason}'") - val RetryMemoryKeys = memoryRetryErrorKeys.toList.flatten - val retry = RetryMemoryKeys.exists(containerStatusReason.contains) - Log.debug(s"Retry job based on provided keys : '${retry}'") - retry - } - - - } - // Despite being a "runtime" exception, BatchExceptions for 429 (too many requests) are *not* fatal: override def isFatal(throwable: Throwable): Boolean = throwable match { case be: BatchException => !be.getMessage.contains("Status Code: 429") diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 757c7ab7b87..e79a948377b 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -171,10 +171,10 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | fi | # copy | $awsCmd s3 cp --no-progress "$$s3_path" "$$destination" || - | { echo "attempt $$i to copy $$s3_path failed" sleep $$((7 * "$$i")) && continue; } + | ( echo "attempt $$i to copy $$s3_path failed" sleep $$((7 * "$$i")) && continue) | # check data integrity | _check_data_integrity $$destination $$s3_path || - | { echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } + | (echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) | # copy succeeded | break | done @@ -203,18 +203,18 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | destination=$${destination%/} | # glob directory. do recursive copy | $awsCmd s3 cp --no-progress $$local_path $$destination --recursive --exclude "cromwell_glob_control_file" || - | { echo "attempt $$i to copy globDir $$local_path failed" && sleep $$((7 * "$$i")) && continue; } + | ( echo "attempt $$i to copy globDir $$local_path failed" && sleep $$((7 * "$$i")) && continue) | # check integrity for each of the files | for FILE in $$(cd $$local_path ; ls | grep -v cromwell_glob_control_file); do | _check_data_integrity $$local_path/$$FILE $$destination/$$FILE || - | { echo "data content length difference detected in attempt $$i to copy $$local_path/$$FILE failed" && sleep $$((7 * "$$i")) && continue 2; } + | ( echo "data content length difference detected in attempt $$i to copy $$local_path/$$FILE failed" && sleep $$((7 * "$$i")) && continue 2) | done | else | $awsCmd s3 cp --no-progress "$$local_path" "$$destination" || - | { echo "attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } + | ( echo "attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) | # check content length for data integrity | _check_data_integrity $$local_path $$destination || - | { echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } + | ( echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) | fi | # copy succeeded | break @@ -235,10 +235,10 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | exit 1 | fi | s3_content_length=$$($awsCmd s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') || - | { echo "Attempt to get head of object failed for $$s3_path." && return 1 ; } + | ( echo "Attempt to get head of object failed for $$s3_path." && return 1 ) | # local | local_content_length=$$(LC_ALL=C ls -dn -- "$$local_path" | awk '{print $$5; exit}' ) || - | { echo "Attempt to get local content length failed for $$_local_path." && return 1; } + | ( echo "Attempt to get local content length failed for $$_local_path." && return 1 ) | # compare | if [[ "$$s3_content_length" -eq "$$local_content_length" ]]; then | true @@ -277,7 +277,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL s""" |touch ${output.name} |_s3_delocalize_with_retry ${output.name} ${output.s3key} - |if [ -e $globDirectory ]; then _s3_delocalize_with_retry $globDirectory $s3GlobOutDirectory ; fi""".stripMargin + |if [ -e $globDirectory ]; then _s3_delocalize_with_retry $globDirectory $s3GlobOutDirectory "; fi""".stripMargin case output: AwsBatchFileOutput if output.s3key.startsWith("s3://") && output.mount.mountPoint.pathAsString == AwsBatchWorkingDisk.MountPoint.pathAsString => diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index df83472bdc2..62a0495c1be 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -133,48 +133,6 @@ Notes: - local hashing means that all used containers are pulled. Make sure you have enough storage - enable a database to make the cache persistent over cromwell restarts -### Retry with more memory - -Cromwell can be configured to retry jobs with more allocated memory, under a defined set of conditions. To enable this, set the following parameters: - -cromwell configuration: `cromwell.config`: -``` -// set the maximal amount of retries. -// backend.providers.AWSBatch.config.default-runtime-attribues.maxRetries -backend { - providers { - AWSBatch { - config { - default-runtime-attributes { - maxRetries: 6 - } - } - } - } -} - -// set the keys for Out-Of-Memory killing. -// system.io.memory-retry-error-keys -system{ - io{ - memory-retry-error-keys = ["OutOfMemory","Killed"] - } -} -``` - -Workflow specific runtime options : `workflow_options.json`: -``` -{ - "memory_retry_multiplier" : 1.5 -} -``` - -When providing the options.json file during workflow submission, jobs that were terminated due to insufficient memory will be retried 6 times, with increasing memory allocation. For example 4Gb => 6Gb => 9Gb => 13.5Gb => ... - -Note: Retries of jobs using the `awsBatchRetryAttempts` counter do *not* increase memory allocation. - - - AWS Batch --------- From 1d225d5599511a508a929ec24d3f99161c8726cd Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Tue, 5 Apr 2022 09:22:51 +0200 Subject: [PATCH 060/326] catch the fact that not all failed batch jobs have a status reason --- .../AwsBatchAsyncBackendJobExecutionActor.scala | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index a534ae2b8da..06d9d4dce35 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -538,8 +538,19 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar Log.debug("container exit code was zero") false case _ => - val containerStatusReason = lastattempt.container.reason - Log.warn(s"Job failed with Container status reason : '${containerStatusReason}'") + // not every failed job has a container exit reason. + val containerStatusReason:String = { + var lastReason = lastattempt.container.reason + // cast null to empty-string to prevent nullpointer execption. + if (lastReason == null || lastReason.isEmpty) { + lastReason = "" + log.debug("No exit reason found for container.") + } else { + Log.warn(s"Job failed with Container status reason : '${lastReason}'") + } + lastReason + } + // check the list of OOM-keys against the exit reason. val RetryMemoryKeys = memoryRetryErrorKeys.toList.flatten val retry = RetryMemoryKeys.exists(containerStatusReason.contains) Log.debug(s"Retry job based on provided keys : '${retry}'") From 039d808be67ac357f291834ef4c487152966c09c Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Tue, 5 Apr 2022 10:15:31 +0200 Subject: [PATCH 061/326] recommit changes rejected in last PR --- .../StandardAsyncExecutionActor.scala | 128 ++++++++++-------- .../engine/io/RetryableRequestSupport.scala | 26 +++- ...wsBatchAsyncBackendJobExecutionActor.scala | 12 +- .../backend/impl/aws/AwsBatchJob.scala | 18 +-- .../scala/cromwell/backend/impl/aws/README.md | 42 ++++++ 5 files changed, 158 insertions(+), 68 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 9b21871b538..2ef8b7bd5b7 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -1265,70 +1265,16 @@ trait StandardAsyncExecutionActor def handleExecutionResult(status: StandardAsyncRunState, oldHandle: StandardAsyncPendingExecutionHandle): Future[ExecutionHandle] = { - // get the memory retry code. - def memoryRetryRC: Future[Boolean] = { - // convert int to boolean - def returnCodeAsBoolean(codeAsOption: Option[String]): Boolean = { - codeAsOption match { - case Some(codeAsString) => - Try(codeAsString.trim.toInt) match { - case Success(code) => code match { - case StderrContainsRetryKeysCode => true - case _ => false - } - case Failure(e) => - log.error(s"'CheckingForMemoryRetry' action exited with code '$codeAsString' which couldn't be " + - s"converted to an Integer. Task will not be retried with more memory. Error: ${ExceptionUtils.getMessage(e)}") - false - } - case None => false - } - } - // read if the file exists - def readMemoryRetryRCFile(fileExists: Boolean): Future[Option[String]] = { - if (fileExists) - asyncIo.contentAsStringAsync(jobPaths.memoryRetryRC, None, failOnOverflow = false).map(Option(_)) - else - Future.successful(None) - } - //finally : assign the yielded variable - for { - fileExists <- asyncIo.existsAsync(jobPaths.memoryRetryRC) - retryCheckRCAsOption <- readMemoryRetryRCFile(fileExists) - retryWithMoreMemory = returnCodeAsBoolean(retryCheckRCAsOption) - } yield retryWithMoreMemory - } - - // get the exit code of the job. - def JobExitCode: Future[String] = { - - // read if the file exists - def readRCFile(fileExists: Boolean): Future[String] = { - if (fileExists) - asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) - else { - jobLogger.warn("RC file not found. Setting job to failed & waiting 5m before retry.") - Thread.sleep(300000) - Future("1") - } - } - //finally : assign the yielded variable - for { - fileExists <- asyncIo.existsAsync(jobPaths.returnCode) - jobRC <- readRCFile(fileExists) - } yield jobRC - } - // get path to sderr val stderr = jobPaths.standardPaths.error lazy val stderrAsOption: Option[Path] = Option(stderr) - // get the three needed variables, using functions above or direct assignment. + // get the three needed variables, using helper functions below, or direct assignment. val stderrSizeAndReturnCodeAndMemoryRetry = for { returnCodeAsString <- JobExitCode // Only check stderr size if we need to, otherwise this results in a lot of unnecessary I/O that // may fail due to race conditions on quickly-executing jobs. stderrSize <- if (failOnStdErr) asyncIo.sizeAsync(stderr) else Future.successful(0L) - retryWithMoreMemory <- memoryRetryRC + retryWithMoreMemory <- memoryRetryRC(oldHandle.pendingJob) } yield (stderrSize, returnCodeAsString, retryWithMoreMemory) stderrSizeAndReturnCodeAndMemoryRetry flatMap { @@ -1337,25 +1283,36 @@ trait StandardAsyncExecutionActor if (isDone(status)) { tryReturnCodeAsInt match { + // stderr not empty : retry case Success(returnCodeAsInt) if failOnStdErr && stderrSize.intValue > 0 => val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(StderrNonEmpty(jobDescriptor.key.tag, stderrSize, stderrAsOption), Option(returnCodeAsInt), None)) retryElseFail(executionHandle) - case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) => + // job was aborted (cancelled by user?) + // on AWS OOM kill are code 137 : check retryWithMoreMemory here + case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) && !retryWithMoreMemory => + jobLogger.debug(s"Job was aborted, code was : '${returnCodeAsString}'") Future.successful(AbortedExecutionHandle) + // job considered ok by accepted exit code case Success(returnCodeAsInt) if continueOnReturnCode.continueFor(returnCodeAsInt) => handleExecutionSuccess(status, oldHandle, returnCodeAsInt) + // job failed on out-of-memory : retry case Success(returnCodeAsInt) if retryWithMoreMemory => + jobLogger.warn(s"Retrying job due to OOM with exit code : '${returnCodeAsString}' ") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), Option(returnCodeAsInt), None)) retryElseFail(executionHandle, retryWithMoreMemory) + // unaccepted return code : retry. case Success(returnCodeAsInt) => + jobLogger.debug(s"Retrying with wrong exit code : '${returnCodeAsString}'") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(WrongReturnCode(jobDescriptor.key.tag, returnCodeAsInt, stderrAsOption), Option(returnCodeAsInt), None)) retryElseFail(executionHandle) case Failure(_) => + jobLogger.warn(s"General failure of job with exit code : '${returnCodeAsString}'") Future.successful(FailedNonRetryableExecutionHandle(ReturnCodeIsNotAnInt(jobDescriptor.key.tag, returnCodeAsString, stderrAsOption), kvPairsToSave = None)) } } else { tryReturnCodeAsInt match { case Success(returnCodeAsInt) if retryWithMoreMemory && !continueOnReturnCode.continueFor(returnCodeAsInt) => + jobLogger.debug(s"job not done but retrying already? : ${status.toString()}") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), Option(returnCodeAsInt), None)) retryElseFail(executionHandle, retryWithMoreMemory) case _ => @@ -1373,6 +1330,63 @@ trait StandardAsyncExecutionActor } } + // helper function for handleExecutionResult : get the exit code of the job. + def JobExitCode: Future[String] = { + + // read if the file exists + def readRCFile(fileExists: Boolean): Future[String] = { + if (fileExists) + asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) + else { + jobLogger.warn("RC file not found. Setting job to failed & retry.") + //Thread.sleep(300000) + Future("1") + } + } + //finally : assign the yielded variable + for { + fileExists <- asyncIo.existsAsync(jobPaths.returnCode) + jobRC <- readRCFile(fileExists) + } yield jobRC + } + + // helper function for handleExecutionResult : get the memory retry code. + def memoryRetryRC(job: StandardAsyncJob): Future[Boolean] = { + // job is used in aws override version. use here to prevent compilation error. + log.debug(s"Looking for memoryRetry in job '${job.jobId}'") + // convert int to boolean + def returnCodeAsBoolean(codeAsOption: Option[String]): Boolean = { + codeAsOption match { + case Some(codeAsString) => + Try(codeAsString.trim.toInt) match { + case Success(code) => code match { + case StderrContainsRetryKeysCode => true + case _ => false + } + case Failure(e) => + log.error(s"'CheckingForMemoryRetry' action exited with code '$codeAsString' which couldn't be " + + s"converted to an Integer. Task will not be retried with more memory. Error: ${ExceptionUtils.getMessage(e)}") + false + } + case None => false + } + } + // read if the file exists + def readMemoryRetryRCFile(fileExists: Boolean): Future[Option[String]] = { + if (fileExists) + asyncIo.contentAsStringAsync(jobPaths.memoryRetryRC, None, failOnOverflow = false).map(Option(_)) + else + Future.successful(None) + } + //finally : assign the yielded variable + for { + fileExists <- asyncIo.existsAsync(jobPaths.memoryRetryRC) + retryCheckRCAsOption <- readMemoryRetryRCFile(fileExists) + retryWithMoreMemory = returnCodeAsBoolean(retryCheckRCAsOption) + } yield retryWithMoreMemory + } + + /** * Send the job id of the running job to the key value store. * diff --git a/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala b/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala index 03ef9224b7d..9efa3e7c767 100644 --- a/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala +++ b/engine/src/main/scala/cromwell/engine/io/RetryableRequestSupport.scala @@ -26,7 +26,7 @@ object RetryableRequestSupport { case _: SocketException => true case _: SocketTimeoutException => true case ioE: IOException if Option(ioE.getMessage).exists(_.contains("Error getting access token for service account")) => true - case ioE: IOException => isGcs500(ioE) || isGcs503(ioE) || isGcs504(ioE) + case ioE: IOException => isGcs500(ioE) || isGcs503(ioE) || isGcs504(ioE) || isAws504(ioE) case other => // Infinitely retryable is a subset of retryable isInfinitelyRetryable(other) @@ -86,4 +86,28 @@ object RetryableRequestSupport { msg.contains("504 Gateway Timeout") ) } + // AWS timeout error + def isAws504(failure: Throwable): Boolean = { + Option(failure.getMessage).exists(msg => + ( + // timeout in reading form s3. + msg.contains("Could not read from s3") && + msg.contains("Timeout waiting for connection") + ) || ( + // reading in cromwell wdl (read_lines() etc) + msg.contains("Failed to evaluate") && + msg.contains("s3://") && + msg.contains("Timed out after") + ) + ) + } + // General AWS IO error : all items unreadable except rc.txt files (might be missing) + // => mainly for testing. will retry mis-specified s3 paths as well... + def isAwsIO(failure:Throwable): Boolean = { + Option(failure.getMessage).exists(msg => + msg.contains("Could not read from s3") && + ! msg.contains("-rc.txt") + ) + } + } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index d5a32cb2b3f..06d9d4dce35 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -57,7 +57,10 @@ import cromwell.filesystems.s3.S3Path import cromwell.filesystems.s3.batch.S3BatchCommandBuilder import cromwell.services.keyvalue.KvClient import org.slf4j.{Logger, LoggerFactory} -import software.amazon.awssdk.services.batch.model.{BatchException, SubmitJobResponse} +import software.amazon.awssdk.services.batch.BatchClient +//import software.amazon.awssdk.services.batch.model.{BatchException, SubmitJobResponse} +import software.amazon.awssdk.services.batch.model._ + import wom.callable.Callable.OutputDefinition import wom.core.FullyQualifiedName import wom.expression.NoIoFunctionSet @@ -184,6 +187,13 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar Option(configuration.awsAuth), configuration.fsxMntPoint) } + + // setup batch client to query job container info + lazy val batchClient: BatchClient = { + val builder = BatchClient.builder() + configureClient(builder, batchJob.optAwsAuthMode, batchJob.configRegion) + } + /* Tries to abort the job in flight * * @param job A StandardAsyncJob object (has jobId value) to cancel diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index e79a948377b..757c7ab7b87 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -171,10 +171,10 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | fi | # copy | $awsCmd s3 cp --no-progress "$$s3_path" "$$destination" || - | ( echo "attempt $$i to copy $$s3_path failed" sleep $$((7 * "$$i")) && continue) + | { echo "attempt $$i to copy $$s3_path failed" sleep $$((7 * "$$i")) && continue; } | # check data integrity | _check_data_integrity $$destination $$s3_path || - | (echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | { echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } | # copy succeeded | break | done @@ -203,18 +203,18 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | destination=$${destination%/} | # glob directory. do recursive copy | $awsCmd s3 cp --no-progress $$local_path $$destination --recursive --exclude "cromwell_glob_control_file" || - | ( echo "attempt $$i to copy globDir $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | { echo "attempt $$i to copy globDir $$local_path failed" && sleep $$((7 * "$$i")) && continue; } | # check integrity for each of the files | for FILE in $$(cd $$local_path ; ls | grep -v cromwell_glob_control_file); do | _check_data_integrity $$local_path/$$FILE $$destination/$$FILE || - | ( echo "data content length difference detected in attempt $$i to copy $$local_path/$$FILE failed" && sleep $$((7 * "$$i")) && continue 2) + | { echo "data content length difference detected in attempt $$i to copy $$local_path/$$FILE failed" && sleep $$((7 * "$$i")) && continue 2; } | done | else | $awsCmd s3 cp --no-progress "$$local_path" "$$destination" || - | ( echo "attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | { echo "attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } | # check content length for data integrity | _check_data_integrity $$local_path $$destination || - | ( echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue) + | { echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } | fi | # copy succeeded | break @@ -235,10 +235,10 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | exit 1 | fi | s3_content_length=$$($awsCmd s3api head-object --bucket "$$bucket" --key "$$key" --query 'ContentLength') || - | ( echo "Attempt to get head of object failed for $$s3_path." && return 1 ) + | { echo "Attempt to get head of object failed for $$s3_path." && return 1 ; } | # local | local_content_length=$$(LC_ALL=C ls -dn -- "$$local_path" | awk '{print $$5; exit}' ) || - | ( echo "Attempt to get local content length failed for $$_local_path." && return 1 ) + | { echo "Attempt to get local content length failed for $$_local_path." && return 1; } | # compare | if [[ "$$s3_content_length" -eq "$$local_content_length" ]]; then | true @@ -277,7 +277,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL s""" |touch ${output.name} |_s3_delocalize_with_retry ${output.name} ${output.s3key} - |if [ -e $globDirectory ]; then _s3_delocalize_with_retry $globDirectory $s3GlobOutDirectory "; fi""".stripMargin + |if [ -e $globDirectory ]; then _s3_delocalize_with_retry $globDirectory $s3GlobOutDirectory ; fi""".stripMargin case output: AwsBatchFileOutput if output.s3key.startsWith("s3://") && output.mount.mountPoint.pathAsString == AwsBatchWorkingDisk.MountPoint.pathAsString => diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index 62a0495c1be..df83472bdc2 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -133,6 +133,48 @@ Notes: - local hashing means that all used containers are pulled. Make sure you have enough storage - enable a database to make the cache persistent over cromwell restarts +### Retry with more memory + +Cromwell can be configured to retry jobs with more allocated memory, under a defined set of conditions. To enable this, set the following parameters: + +cromwell configuration: `cromwell.config`: +``` +// set the maximal amount of retries. +// backend.providers.AWSBatch.config.default-runtime-attribues.maxRetries +backend { + providers { + AWSBatch { + config { + default-runtime-attributes { + maxRetries: 6 + } + } + } + } +} + +// set the keys for Out-Of-Memory killing. +// system.io.memory-retry-error-keys +system{ + io{ + memory-retry-error-keys = ["OutOfMemory","Killed"] + } +} +``` + +Workflow specific runtime options : `workflow_options.json`: +``` +{ + "memory_retry_multiplier" : 1.5 +} +``` + +When providing the options.json file during workflow submission, jobs that were terminated due to insufficient memory will be retried 6 times, with increasing memory allocation. For example 4Gb => 6Gb => 9Gb => 13.5Gb => ... + +Note: Retries of jobs using the `awsBatchRetryAttempts` counter do *not* increase memory allocation. + + + AWS Batch --------- From ad8f194c87f99f191fe7e3e8be7bb4c984a39461 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Sat, 9 Apr 2022 19:10:14 +0200 Subject: [PATCH 062/326] fix nullPointer Exception in new retryfunction in case of spot kills --- ...wsBatchAsyncBackendJobExecutionActor.scala | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 06d9d4dce35..c6a9ed3aae3 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -526,22 +526,33 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar // new OOM detection override def memoryRetryRC(job: StandardAsyncJob): Future[Boolean] = Future { + // STATUS LOGIC: + // - success : container exit code is zero + // - command failure: container exit code > 0, no statusReason in container + // - OOM kill : container exit code > 0, statusReason contains "OutOfMemory" + // - spot kill : no container exit code set. statusReason of ATTEMPT (not container) says "host EC2 (...) terminated" + Log.debug(s"Looking for memoryRetry in job '${job.jobId}'") val describeJobsResponse = batchClient.describeJobs(DescribeJobsRequest.builder.jobs(job.jobId).build) val jobDetail = describeJobsResponse.jobs.get(0) //OrElse(throw new RuntimeException(s"Could not get job details for job '${job.jobId}'")) val nrAttempts = jobDetail.attempts.size val lastattempt = jobDetail.attempts.get(nrAttempts-1) - val containerRC = lastattempt.container.exitCode + var containerRC = lastattempt.container.exitCode + // if missing, set to failed. + if (containerRC == null ) { + Log.debug(s"No RC found for job '${job.jobId}', most likely a spot kill") + containerRC = 1 + } // if not zero => get reason, else set retry to false. containerRC.toString() match { case "0" => - Log.debug("container exit code was zero") + Log.debug("container exit code was zero. job succeeded") false case _ => - // not every failed job has a container exit reason. + // failed job due to command errors (~ user errors) don't have a container exit reason. val containerStatusReason:String = { var lastReason = lastattempt.container.reason - // cast null to empty-string to prevent nullpointer execption. + // cast null to empty-string to prevent nullpointer exception. if (lastReason == null || lastReason.isEmpty) { lastReason = "" log.debug("No exit reason found for container.") From d4cca503e7470a00a6d1fe6926f3cd5c3e34efe0 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Tue, 12 Jul 2022 14:33:49 +0200 Subject: [PATCH 063/326] add multipart upload configuration to job script and streamline with internal cromwell call cache copy procedures --- .../org/lerch/s3fs/S3FileSystemProvider.java | 37 +++++++++++++------ .../backend/impl/aws/AwsBatchJob.scala | 33 ++++++++++++++++- .../scala/cromwell/backend/impl/aws/README.md | 23 ++++++++++++ 3 files changed, 80 insertions(+), 13 deletions(-) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystemProvider.java b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystemProvider.java index 208db86d1ba..b8a3b44aa08 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystemProvider.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/S3FileSystemProvider.java @@ -40,6 +40,9 @@ import static java.lang.Thread.currentThread; import static java.lang.Thread.sleep; import static org.lerch.s3fs.AmazonS3Factory.*; +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; + /** * @@ -82,7 +85,7 @@ public class S3FileSystemProvider extends FileSystemProvider { private Cache cache = new Cache(); private final Logger log = Logger.getLogger(this.getClass().getName()); - + @Override public String getScheme() { return "s3"; @@ -422,6 +425,12 @@ public void copy(Path source, Path target, CopyOption... options) throws IOExcep final S3Path s3Source = toS3Path(source); final S3Path s3Target = toS3Path(target); + log.fine("Starting copy of "+source); + // get/set threshold for multipart uploads. + final Config conf = ConfigFactory.load(); + Long threshold = conf.hasPath("engine.filesystems.s3.MultipartThreshold") ? conf.getMemorySize("engine.filesystems.s3.MultipartThreshold").toBytes() : 5L * 1024L * 1024L * 1024L; + log.fine("MultiPart Threshold during S3 copy: "+threshold+ " bytes"); + final ImmutableSet actualOptions = ImmutableSet.copyOf(options); verifySupportedOptions(EnumSet.of(StandardCopyOption.REPLACE_EXISTING), actualOptions); @@ -429,15 +438,17 @@ public void copy(Path source, Path target, CopyOption... options) throws IOExcep if (exists(s3Target) && !actualOptions.contains(StandardCopyOption.REPLACE_EXISTING)) { throw new FileAlreadyExistsException(format("target already exists: %s", target)); } - long objectSize = this.objectSize(s3Source); - long threshold = 5L * 1024L * 1024L * 1024L; //5GB if (objectSize >= threshold) { // large file, do a multipart copy + log.fine("Doing multipart copy for "+s3Source+" to "+s3Target); multiPartCopy(s3Source, objectSize, s3Target, options); + log.fine("MP copy finished for : "+source); } else { //do a normal copy + log.fine("Doing normal copy for "+s3Source+" to "+s3Target); + String bucketNameOrigin = s3Source.getFileStore().name(); String keySource = s3Source.getKey(); String bucketNameTarget = s3Target.getFileStore().name(); @@ -451,6 +462,7 @@ public void copy(Path source, Path target, CopyOption... options) throws IOExcep .destinationBucket(bucketNameTarget) .destinationKey(keyTarget) .build()); + log.fine("Copy finished : "+source); } } @@ -463,7 +475,7 @@ public void copy(Path source, Path target, CopyOption... options) throws IOExcep * @param options copy options */ private void multiPartCopy(S3Path source, long objectSize, S3Path target, CopyOption... options) { - log.info(() -> "Attempting multipart copy as part of call cache hit: source = " + source + ", objectSize = " + objectSize + ", target = " + target + ", options = " + Arrays.deepToString(options)); + log.fine(() -> "Attempting multipart copy as part of call cache hit: source = " + source + ", objectSize = " + objectSize + ", target = " + target + ", options = " + Arrays.deepToString(options)); S3Client s3Client = target.getFileStore().getClient(); @@ -482,10 +494,13 @@ private void multiPartCopy(S3Path source, long objectSize, S3Path target, CopyOp List> uploadFutures = new ArrayList<>(); /* if you set this number to a larger value then ensure the HttpClient has sufficient - maxConnections (see org.lerch.s3fsAmazonS3Factory.getHttpClient) */ - int THREADS = 500; + maxConnections (see org.lerch.s3fsAmazonS3Factory.getHttpClient) : + default value == 500 */ + final Config conf = ConfigFactory.load(); + // override max connections if set. + final int THREADS = conf.hasPath("engine.filesystems.s3.threads") ? Integer.parseInt(conf.getString("engine.filesystems.s3.threads")) : 500; - log.info(() -> "Allocating work stealing pool with "+THREADS+" threads"); + log.fine(() -> "Allocating work stealing pool with "+THREADS+" threads"); final ExecutorService MULTIPART_OPERATION_EXECUTOR_SERVICE = Executors.newWorkStealingPool(THREADS); @@ -535,7 +550,7 @@ private void multiPartCopy(S3Path source, long objectSize, S3Path target, CopyOp log.severe("Max Memory = "+runtime.maxMemory()); // clean up so we don't get zombie threads - log.info(() -> "Shutting down work stealing pool"); + log.fine(() -> "Shutting down work stealing pool"); MULTIPART_OPERATION_EXECUTOR_SERVICE.shutdown(); log.throwing(S3FileSystemProvider.class.getName(), "multiPartCopy", e); @@ -562,7 +577,7 @@ private void multiPartCopy(S3Path source, long objectSize, S3Path target, CopyOp .build()) .collect(Collectors.toList()); - log.info(() -> "Shutting down work stealing pool"); + log.fine(() -> "Shutting down work stealing pool"); MULTIPART_OPERATION_EXECUTOR_SERVICE.shutdown(); // build a request to complete the upload @@ -579,7 +594,7 @@ private void multiPartCopy(S3Path source, long objectSize, S3Path target, CopyOp // make a request to complete the multipart upload final CompleteMultipartUploadResponse completeMultipartUploadResponse = s3Client.completeMultipartUpload(completeMultipartUploadRequest); - log.info(() -> "Multipart copy complete with status code: "+completeMultipartUploadResponse.sdkHttpResponse().statusCode()); + log.fine(() -> "Multipart copy complete with status code: "+completeMultipartUploadResponse.sdkHttpResponse().statusCode()); } catch (AwsServiceException | SdkClientException e) { log.warning(() -> "An "+e.getClass().getName()+" with message "+e.getMessage()+ " occurred while completing the multipart upload. Will try again."); @@ -590,7 +605,7 @@ private void multiPartCopy(S3Path source, long objectSize, S3Path target, CopyOp } final CompleteMultipartUploadResponse completeMultipartUploadResponse = s3Client.completeMultipartUpload(completeMultipartUploadRequest); - log.info(() -> "Multipart copy complete with status code: "+completeMultipartUploadResponse.sdkHttpResponse().statusCode()); + log.fine(() -> "Multipart copy complete with status code: "+completeMultipartUploadResponse.sdkHttpResponse().statusCode()); } } diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 757c7ab7b87..7ad344f91e5 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -60,6 +60,8 @@ import scala.collection.JavaConverters._ import scala.concurrent.duration._ import scala.language.higherKinds import scala.util.Try +import com.typesafe.config.Config +import com.typesafe.config.ConfigFactory /** * The actual job for submission in AWS batch. `AwsBatchJob` is the primary interface to AWS Batch. It creates the @@ -146,10 +148,15 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL case _ => "" }.toList.mkString("\n") + // get multipart threshold from config. + val conf : Config = ConfigFactory.load(); + val mp_threshold : Long = if (conf.hasPath("engine.filesystems.s3.MultipartThreshold") ) conf.getMemorySize("engine.filesystems.s3.MultipartThreshold").toBytes() else 5L * 1024L * 1024L * 1024L; + Log.info(s"MultiPart Threshold for delocalizing is $mp_threshold") + // this goes at the start of the script after the #! val preamble = s""" - |export AWS_METADATA_SERVICE_TIMEOUT=10 + |export AWS_METADATA_SERVICE_TIMEOUT=10 |export AWS_METADATA_SERVICE_NUM_ATTEMPTS=10 | |function _s3_localize_with_retry() { @@ -171,7 +178,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | fi | # copy | $awsCmd s3 cp --no-progress "$$s3_path" "$$destination" || - | { echo "attempt $$i to copy $$s3_path failed" sleep $$((7 * "$$i")) && continue; } + | { echo "attempt $$i to copy $$s3_path failed" && sleep $$((7 * "$$i")) && continue; } | # check data integrity | _check_data_integrity $$destination $$s3_path || | { echo "data content length difference detected in attempt $$i to copy $$local_path failed" && sleep $$((7 * "$$i")) && continue; } @@ -181,10 +188,20 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL |} | |function _s3_delocalize_with_retry() { + | # input variables | local local_path=$$1 | # destination must be the path to a file and not just the directory you want the file in | local destination=$$2 | + | # get the multipart chunk size + | chunk_size=$$(_get_multipart_chunk_size $$local_path) + | echo "chunk size : $$chunk_size bytes" + | local MP_THRESHOLD=${mp_threshold} + | # then set them + | $awsCmd configure set default.s3.multipart_threshold $$MP_THRESHOLD + | $awsCmd configure set default.s3.multipart_chunksize $$chunk_size + | + | # try & validate upload 5 times | for i in {1..6}; | do | # if tries exceeded : abort @@ -221,6 +238,18 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | done |} | + |function _get_multipart_chunk_size() { + | local file_path=$$1 + | # file size + | file_size=$$(stat --printf="%s" $$file_path) + | # chunk_size : you can have at most 10K parts with at least one 5MB part + | # this reflects the formula in s3-copy commands of cromwell (S3FileSystemProvider.java) + | # => long partSize = Math.max((objectSize / 10000L) + 1, 5 * 1024 * 1024); + | a=$$(( ( file_size / 10000) + 1 )) + | b=$$(( 5 * 1024 * 1024 )) + | chunk_size=$$(( a > b ? a : b )) + | echo $$chunk_size + } |function _check_data_integrity() { | local local_path=$$1 | local s3_path=$$2 diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index df83472bdc2..2003e0f9ef3 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -173,6 +173,29 @@ When providing the options.json file during workflow submission, jobs that were Note: Retries of jobs using the `awsBatchRetryAttempts` counter do *not* increase memory allocation. +### Multipart copy settings + +Multipart copying is a technology to increase upload performance by splitting the file in parallel processes. The awscli client does this automatically for all files over 8Mb. If a file was uploaded using MultiPart uploads, this is reflected in the ETAG value by a trailing '-\d+', where '\d+' reflects the number of parts. + +Cromwell uses a default threshold of 5Gb for multipart copying during cache-copy processes, and tries to create as much parts as possible (minimal size 5Mb, max parts 10K). Although the default settings are fine, this threshold can be adjusted to reflect ETAG-expectations in for examples upstream/downstream applications. If the treshold changes, the ETAGs will differ after copying the data. + +s3 multipart specific options: `cromwell.config`: +``` +// activate s3 as a supported filesystem +engine { + filesystems { + s3 { + auth = "default", + enabled: true, + # at what size should we start using multipart uploads ? + MultipartThreshold = "4G", + # multipart copying threads : if you set this number to a larger value then ensure the HttpClient has sufficient + # maxConnections (see org.lerch.s3fsAmazonS3Factory.getHttpClient). Default : 500 + threads = 50 + } + } +} +``` AWS Batch From 2bcf564ad7cd8611ba3a8ae7932e9baab2f5bb1c Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Tue, 12 Jul 2022 14:34:09 +0200 Subject: [PATCH 064/326] reduce logging a bit --- .../s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java index 818a0004a3e..7cfd648e7a7 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/util/S3ClientStore.java @@ -55,10 +55,10 @@ protected S3Client generateClient(String bucketName){ * @return an S3 client appropriate for the region of the named bucket */ protected S3Client generateClient (String bucketName, S3Client locationClient) { - logger.info("generating client for bucket: '{}'", bucketName); + logger.debug("generating client for bucket: '{}'", bucketName); S3Client bucketSpecificClient; try { - logger.info("determining bucket location with getBucketLocation"); + logger.debug("determining bucket location with getBucketLocation"); String bucketLocation = locationClient.getBucketLocation(builder -> builder.bucket(bucketName)).locationConstraintAsString(); bucketSpecificClient = this.clientForRegion(bucketLocation); @@ -93,7 +93,7 @@ private S3Client clientForRegion(String regionString){ // It may be useful to further cache clients for regions although at some point clients for buckets may need to be // specialized beyond just region end points. Region region = regionString.equals("") ? Region.US_EAST_1 : Region.of(regionString); - logger.info("bucket region is: '{}'", region.id()); + logger.debug("bucket region is: '{}'", region.id()); return S3Client.builder().region(region).build(); } From b3dea498201caa047e78b75e4fa69a84016bf4bc Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Tue, 12 Jul 2022 14:34:41 +0200 Subject: [PATCH 065/326] take httpClient max-connections from config if present --- .../java/org/lerch/s3fs/AmazonS3Factory.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java b/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java index 9d525587642..c45c1b08574 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java @@ -14,7 +14,8 @@ import java.net.URI; import java.util.Properties; - +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; /** * Factory base class to create a new AmazonS3 instance. @@ -99,7 +100,20 @@ protected AwsCredentials getAWSCredentials(Properties props) { protected SdkHttpClient getHttpClient(Properties props) { // TODO: custom http configuration based on properties - return ApacheHttpClient.builder().maxConnections(1024).build(); + ApacheHttpClient.Builder httpClientBuilder = ApacheHttpClient.builder(); + // override max connections if set in cromwell.config : engine.filesystems. + final Config conf = ConfigFactory.load(); + log.fine("trying to override some settings in httpclient."); + if (conf.hasPath("akka.http.server.max-connections") ) { + log.debug("Overriding maxconnections to "+conf.getString("akka.http.server.max-connections")); + httpClientBuilder = httpClientBuilder.maxConnections(Integer.parseInt(conf.getString("akka.http.server.max-connections"))); + } + else { + log.debug("MaxConnections not found in config. Using default value of 1024"); + httpClientBuilder = httpClientBuilder.maxConnections(1024); + } + //return ApacheHttpClient.builder().maxConnections(1024).build(); + return httpClientBuilder.build(); } protected S3Configuration getServiceConfiguration(Properties props) { From 258279ad2e0c97c01832789b8307406805c26def Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Tue, 12 Jul 2022 19:23:46 +0200 Subject: [PATCH 066/326] move more logging to debug channel --- .../cromwell/backend/standard/StandardAsyncExecutionActor.scala | 2 +- .../s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java | 2 +- .../src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 2ef8b7bd5b7..8a032a02faa 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -385,7 +385,7 @@ trait StandardAsyncExecutionActor def commandScriptContents: ErrorOr[String] = { val commandString = instantiatedCommand.commandString val commandStringAbbreviated = StringUtils.abbreviateMiddle(commandString, "...", abbreviateCommandLength) - jobLogger.info(s"`$commandStringAbbreviated`") + jobLogger.debug(s"`$commandStringAbbreviated`") tellMetadata(Map(CallMetadataKeys.CommandLine -> commandStringAbbreviated)) val cwd = commandDirectory diff --git a/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java b/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java index c45c1b08574..08ad1abafa5 100644 --- a/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java +++ b/filesystems/s3/src/main/java/org/lerch/s3fs/AmazonS3Factory.java @@ -103,7 +103,7 @@ protected SdkHttpClient getHttpClient(Properties props) { ApacheHttpClient.Builder httpClientBuilder = ApacheHttpClient.builder(); // override max connections if set in cromwell.config : engine.filesystems. final Config conf = ConfigFactory.load(); - log.fine("trying to override some settings in httpclient."); + log.debug("trying to override some settings in httpclient."); if (conf.hasPath("akka.http.server.max-connections") ) { log.debug("Overriding maxconnections to "+conf.getString("akka.http.server.max-connections")); httpClientBuilder = httpClientBuilder.maxConnections(Integer.parseInt(conf.getString("akka.http.server.max-connections"))); diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 7ad344f91e5..b13e58e2b91 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -151,7 +151,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL // get multipart threshold from config. val conf : Config = ConfigFactory.load(); val mp_threshold : Long = if (conf.hasPath("engine.filesystems.s3.MultipartThreshold") ) conf.getMemorySize("engine.filesystems.s3.MultipartThreshold").toBytes() else 5L * 1024L * 1024L * 1024L; - Log.info(s"MultiPart Threshold for delocalizing is $mp_threshold") + Log.debug(s"MultiPart Threshold for delocalizing is $mp_threshold") // this goes at the start of the script after the #! val preamble = From 4b501ee8c5f03d12f9d2bcb1014d60d652bf4537 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Fri, 19 Aug 2022 10:40:16 +0200 Subject: [PATCH 067/326] more logging reduction --- .../lifecycle/execution/job/EngineJobExecutionActor.scala | 2 +- .../main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala index 891a5b6dd26..3402bc7c085 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala @@ -262,7 +262,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, log.info(template, jobTag, data.failedCopyAttempts, callCachingParameters.maxFailedCopyAttempts, data.aggregatedHashString) } else { log.info(s"BT-322 {} cache hit copying nomatch: could not find a suitable cache hit.", jobTag) - workflowLogger.info("Could not copy a suitable cache hit for {}. No copy attempts were made.", jobTag) + workflowLogger.debug("Could not copy a suitable cache hit for {}. No copy attempts were made.", jobTag) } runJob(data) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index b13e58e2b91..f0aeab3fbbd 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -195,7 +195,6 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL | | # get the multipart chunk size | chunk_size=$$(_get_multipart_chunk_size $$local_path) - | echo "chunk size : $$chunk_size bytes" | local MP_THRESHOLD=${mp_threshold} | # then set them | $awsCmd configure set default.s3.multipart_threshold $$MP_THRESHOLD @@ -378,7 +377,8 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL //calls the client to submit the job def callClient(definitionArn: String, awsBatchAttributes: AwsBatchAttributes): Aws[F, SubmitJobResponse] = { - Log.info(s"Submitting taskId: $taskId, job definition : $definitionArn, script: $batch_script") + Log.debug(s"Submitting taskId: $taskId, job definition : $definitionArn, script: $batch_script") + Log.info(s"Submitting taskId: $taskId, script: $batch_script") val submit: F[SubmitJobResponse] = async.delay(batchClient.submitJob( From 517a6158774315b28f78e3d099792e89c3d740d3 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Fri, 19 Aug 2022 10:43:21 +0200 Subject: [PATCH 068/326] handle jobs terminated before first attempt in retry logic --- ...wsBatchAsyncBackendJobExecutionActor.scala | 77 +++++++++++++++++-- 1 file changed, 69 insertions(+), 8 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index c6a9ed3aae3..d1c774ce806 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -37,11 +37,16 @@ import java.io.FileNotFoundException import akka.actor.ActorRef import akka.pattern.AskSupport import akka.util.Timeout + +import cats.implicits._ + +import common.exception.MessageAggregation import common.collections.EnhancedCollections._ import common.util.StringUtil._ import common.validation.Validation._ + import cromwell.backend._ -import cromwell.backend.async.{ExecutionHandle, PendingExecutionHandle} +import cromwell.backend.async._ //{ExecutionHandle, PendingExecutionHandle} import cromwell.backend.impl.aws.IntervalLimitedAwsJobSubmitActor.SubmitAwsJobRequest import cromwell.backend.impl.aws.OccasionalStatusPollingActor.{NotifyOfStatus, WhatsMyStatus} import cromwell.backend.impl.aws.RunStatus.{Initializing, TerminalRunStatus} @@ -49,16 +54,21 @@ import cromwell.backend.impl.aws.io._ import cromwell.backend.io.DirectoryFunctions import cromwell.backend.io.JobPaths import cromwell.backend.standard.{StandardAsyncExecutionActor, StandardAsyncExecutionActorParams, StandardAsyncJob} +import cromwell.backend.OutputEvaluator._ + import cromwell.core._ +import cromwell.core.path.Path import cromwell.core.path.{DefaultPathBuilder, Path, PathBuilder, PathFactory} import cromwell.core.io.{DefaultIoCommandBuilder, IoCommandBuilder} import cromwell.core.retry.SimpleExponentialBackoff + import cromwell.filesystems.s3.S3Path import cromwell.filesystems.s3.batch.S3BatchCommandBuilder + import cromwell.services.keyvalue.KvClient + import org.slf4j.{Logger, LoggerFactory} import software.amazon.awssdk.services.batch.BatchClient -//import software.amazon.awssdk.services.batch.model.{BatchException, SubmitJobResponse} import software.amazon.awssdk.services.batch.model._ import wom.callable.Callable.OutputDefinition @@ -67,7 +77,7 @@ import wom.expression.NoIoFunctionSet import wom.types.{WomArrayType, WomSingleFileType} import wom.values._ -import scala.concurrent.{Future, Promise} +import scala.concurrent._ import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.control.NoStackTrace @@ -510,7 +520,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar case NotifyOfStatus(_, _, Some(value)) => Future.successful(value) case NotifyOfStatus(_, _, None) => - jobLogger.info("Having to fall back to AWS query for status") + jobLogger.debug("Having to fall back to AWS query for status") Future.fromTry(job.status(jobId)) case other => val message = s"Programmer Error (please report this): Received an unexpected message from the OccasionalPollingActor: $other" @@ -536,8 +546,24 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar val describeJobsResponse = batchClient.describeJobs(DescribeJobsRequest.builder.jobs(job.jobId).build) val jobDetail = describeJobsResponse.jobs.get(0) //OrElse(throw new RuntimeException(s"Could not get job details for job '${job.jobId}'")) val nrAttempts = jobDetail.attempts.size - val lastattempt = jobDetail.attempts.get(nrAttempts-1) - var containerRC = lastattempt.container.exitCode + // if job is terminated/cancelled before starting, there are no attempts. + val lastattempt = + try { + jobDetail.attempts.get(nrAttempts-1) + } catch { + case _ : Throwable => null + } + if (lastattempt == null ) { + Log.info(s"No attempts were made for job '${job.jobId}'. no memory-related retry needed.") + false + } + + var containerRC = + try { + lastattempt.container.exitCode + } catch { + case _ : Throwable => null + } // if missing, set to failed. if (containerRC == null ) { Log.debug(s"No RC found for job '${job.jobId}', most likely a spot kill") @@ -621,8 +647,9 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar override def getTerminalEvents(runStatus: RunStatus): Seq[ExecutionEvent] = { runStatus match { case successStatus: RunStatus.Succeeded => successStatus.eventList - case unknown => - throw new RuntimeException(s"handleExecutionSuccess not called with RunStatus.Success. Instead got $unknown") + case unknown => { + throw new RuntimeException(s"handleExecutionSuccess not called with RunStatus.Success. Instead got $unknown") + } } } @@ -642,4 +669,38 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar } ) } + + override def handleExecutionSuccess(runStatus: StandardAsyncRunState, + handle: StandardAsyncPendingExecutionHandle, + returnCode: Int)(implicit ec: ExecutionContext): Future[ExecutionHandle] = { + evaluateOutputs() map { + case ValidJobOutputs(outputs) => + // Need to make sure the paths are up to date before sending the detritus back in the response + updateJobPaths() + // If instance is terminated while copying stdout/stderr : status is failed while jobs outputs are ok + // => Retryable + if (runStatus.toString().equals("Failed")) { + jobLogger.warn("Got Failed RunStatus for success Execution") + + val exception = new MessageAggregation { + override def exceptionContext: String = "Got Failed RunStatus for success Execution" + override def errorMessages: Traversable[String] = Array("Got Failed RunStatus for success Execution") + } + FailedNonRetryableExecutionHandle(exception, kvPairsToSave = None) + } else { + SuccessfulExecutionHandle(outputs, returnCode, jobPaths.detritusPaths, getTerminalEvents(runStatus)) + } + case InvalidJobOutputs(errors) => + val exception = new MessageAggregation { + override def exceptionContext: String = "Failed to evaluate job outputs" + override def errorMessages: Traversable[String] = errors.toList + } + FailedNonRetryableExecutionHandle(exception, kvPairsToSave = None) + case JobOutputsEvaluationException(exception: Exception) if retryEvaluateOutputsAggregated(exception) => + // Return the execution handle in this case to retry the operation + handle + case JobOutputsEvaluationException(ex) => FailedNonRetryableExecutionHandle(ex, kvPairsToSave = None) + } + } + } From 808767272796f9b40d50bbce1cd73b46119e4f93 Mon Sep 17 00:00:00 2001 From: geertvandeweyer Date: Fri, 19 Aug 2022 10:44:20 +0200 Subject: [PATCH 069/326] handle spot kills AFTER RC.txt file uploaded as a regular spot kill and allow retry --- .../backend/async/KnownJobFailureException.scala | 4 ++++ .../standard/StandardAsyncExecutionActor.scala | 16 ++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/backend/src/main/scala/cromwell/backend/async/KnownJobFailureException.scala b/backend/src/main/scala/cromwell/backend/async/KnownJobFailureException.scala index 8dd9aef3544..97b76c77f5e 100644 --- a/backend/src/main/scala/cromwell/backend/async/KnownJobFailureException.scala +++ b/backend/src/main/scala/cromwell/backend/async/KnownJobFailureException.scala @@ -13,6 +13,10 @@ final case class WrongReturnCode(jobTag: String, returnCode: Int, stderrPath: Op override def getMessage = s"Job $jobTag exited with return code $returnCode which has not been declared as a valid return code. See 'continueOnReturnCode' runtime attribute for more details." } +final case class UnExpectedStatus(jobTag: String, returnCode: Int, jobStatus: String, stderrPath: Option[Path]) extends KnownJobFailureException { + override def getMessage = s"Job $jobTag exited with success code '$returnCode' but failed status '$jobStatus'. Suspecting spot kill and retrying." +} + final case class ReturnCodeIsNotAnInt(jobTag: String, returnCode: String, stderrPath: Option[Path]) extends KnownJobFailureException { override def getMessage = { if (returnCode.isEmpty) diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 8a032a02faa..38d9a821f7e 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -364,7 +364,7 @@ trait StandardAsyncExecutionActor * to re-do this before sending the response. */ private var jobPathsUpdated: Boolean = false - private def updateJobPaths(): Unit = if (!jobPathsUpdated) { + def updateJobPaths(): Unit = if (!jobPathsUpdated) { // .get's are safe on stdout and stderr after falling back to default names above. jobPaths.standardPaths = StandardPaths( output = hostPathFromContainerPath(executionStdout), @@ -1161,7 +1161,7 @@ trait StandardAsyncExecutionActor configurationDescriptor.slowJobWarningAfter foreach { duration => self ! WarnAboutSlownessAfter(handle.pendingJob.jobId, duration) } tellKvJobId(handle.pendingJob) map { _ => - if (logJobIds) jobLogger.info(s"job id: ${handle.pendingJob.jobId}") + if (logJobIds) jobLogger.debug(s"job id: ${handle.pendingJob.jobId}") tellMetadata(Map(CallMetadataKeys.JobId -> handle.pendingJob.jobId)) /* NOTE: Because of the async nature of the Scala Futures, there is a point in time where we have submitted this or @@ -1280,7 +1280,7 @@ trait StandardAsyncExecutionActor stderrSizeAndReturnCodeAndMemoryRetry flatMap { case (stderrSize, returnCodeAsString, retryWithMoreMemory) => val tryReturnCodeAsInt = Try(returnCodeAsString.trim.toInt) - + jobLogger.debug(s"Handling execution Result with status '${status.toString()}' and returnCode ${returnCodeAsString}") if (isDone(status)) { tryReturnCodeAsInt match { // stderr not empty : retry @@ -1292,12 +1292,17 @@ trait StandardAsyncExecutionActor case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) && !retryWithMoreMemory => jobLogger.debug(s"Job was aborted, code was : '${returnCodeAsString}'") Future.successful(AbortedExecutionHandle) + // if instance killed after RC.txt creation : edge case with status == Failed AND returnCode == [accepted values] => retry. + case Success(returnCodeAsInt) if status.toString() == "Failed" && continueOnReturnCode.continueFor(returnCodeAsInt) => + jobLogger.debug(s"Suspected spot kill due to status/RC mismatch") + val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(UnExpectedStatus(jobDescriptor.key.tag, returnCodeAsInt, status.toString(), stderrAsOption), Option(returnCodeAsInt), None)) + retryElseFail(executionHandle) // job considered ok by accepted exit code case Success(returnCodeAsInt) if continueOnReturnCode.continueFor(returnCodeAsInt) => handleExecutionSuccess(status, oldHandle, returnCodeAsInt) // job failed on out-of-memory : retry case Success(returnCodeAsInt) if retryWithMoreMemory => - jobLogger.warn(s"Retrying job due to OOM with exit code : '${returnCodeAsString}' ") + jobLogger.info(s"Retrying job due to OOM with exit code : '${returnCodeAsString}' ") val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), Option(returnCodeAsInt), None)) retryElseFail(executionHandle, retryWithMoreMemory) // unaccepted return code : retry. @@ -1338,8 +1343,7 @@ trait StandardAsyncExecutionActor if (fileExists) asyncIo.contentAsStringAsync(jobPaths.returnCode, None, failOnOverflow = false) else { - jobLogger.warn("RC file not found. Setting job to failed & retry.") - //Thread.sleep(300000) + jobLogger.debug("RC file not found. Setting job to failed.") Future("1") } } From 56d927eeb530247973cfd7fd9fc4df7596972ab8 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Fri, 19 Aug 2022 11:34:36 +0100 Subject: [PATCH 070/326] fix depecrated --- .../backend/impl/aws/AwsBatchInitializationActor.scala | 2 +- .../main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala | 6 ++---- .../backend/impl/aws/AwsBatchRuntimeAttributes.scala | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala index d608c162be0..16241dbb8ce 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchInitializationActor.scala @@ -49,7 +49,7 @@ import spray.json.{JsObject, JsString} import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future -import scala.collection.JavaConverters._ +import scala.jdk.CollectionConverters._ case class AwsBatchInitializationActorParams ( diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala index 5c97886130f..72469f75475 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchJob.scala @@ -55,10 +55,8 @@ import software.amazon.awssdk.services.s3.S3Client import software.amazon.awssdk.services.s3.model.{GetObjectRequest, HeadObjectRequest, NoSuchKeyException, PutObjectRequest} import wdl4s.parser.MemoryUnit -import java.security.MessageDigest -import scala.collection.JavaConverters._ +import scala.jdk.CollectionConverters._ import scala.concurrent.duration._ -import scala.language.higherKinds import scala.util.Try import com.typesafe.config.Config import com.typesafe.config.ConfigFactory @@ -136,7 +134,7 @@ final case class AwsBatchJob(jobDescriptor: BackendJobDescriptor, // WDL/CWL case input: AwsBatchFileInput if input.s3key.startsWith("s3://") => s"_s3_localize_with_retry ${input.s3key} ${input.mount.mountPoint.pathAsString}/${input.local}" - .replaceAllLiterally(AwsBatchWorkingDisk.MountPoint.pathAsString, workDir) + .replace(AwsBatchWorkingDisk.MountPoint.pathAsString, workDir) case input: AwsBatchFileInput => //here we don't need a copy command but the centaurTests expect us to verify the existence of the file diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala index 7a3b4fdc150..c6dcd6aecfa 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchRuntimeAttributes.scala @@ -423,7 +423,7 @@ object UlimitsValidation extends RuntimeAttributesValidation[Vector[Map[String, String]]] { override def key: String = AwsBatchRuntimeAttributes.UlimitsKey - override def coercion: Traversable[WomType] = + override def coercion: Iterable[WomType] = Set(WomStringType, WomArrayType(WomMapType(WomStringType, WomStringType))) var accepted_keys = Set("name", "softLimit", "hardLimit") From 87cffaa651242d2bb4affb9164e1a89f79aeec5e Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Fri, 19 Aug 2022 12:01:59 +0100 Subject: [PATCH 071/326] fix deprecated libraries --- .../lifecycle/execution/job/EngineJobExecutionActor.scala | 2 +- .../impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala index f2bd2dd4b3d..0f9eec73ef4 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala @@ -262,7 +262,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, log.info(template, jobTag, data.failedCopyAttempts, callCachingParameters.maxFailedCopyAttempts, data.aggregatedHashString) } else { log.info(s"BT-322 {} cache hit copying nomatch: could not find a suitable cache hit.", jobTag) - workflowLogger.debug("Could not copy a suitable cache hit for {}. No copy attempts were made.", arg = jobTag) + workflowLogger.debug(s"Could not copy a suitable cache hit for {$jobTag}. No copy attempts were made.") } runJob(data) diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala index 4ccc6272fc0..af9d95f8b66 100755 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/AwsBatchAsyncBackendJobExecutionActor.scala @@ -57,7 +57,6 @@ import cromwell.backend.standard.{StandardAsyncExecutionActor, StandardAsyncExec import cromwell.backend.OutputEvaluator._ import cromwell.core._ -import cromwell.core.path.Path import cromwell.core.path.{DefaultPathBuilder, Path, PathBuilder, PathFactory} import cromwell.core.io.{DefaultIoCommandBuilder, IoCommandBuilder} import cromwell.core.retry.SimpleExponentialBackoff @@ -684,7 +683,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar val exception = new MessageAggregation { override def exceptionContext: String = "Got Failed RunStatus for success Execution" - override def errorMessages: Traversable[String] = Array("Got Failed RunStatus for success Execution") + override def errorMessages: Iterable[String] = Array("Got Failed RunStatus for success Execution") } FailedNonRetryableExecutionHandle(exception, kvPairsToSave = None) } else { @@ -693,7 +692,7 @@ class AwsBatchAsyncBackendJobExecutionActor(override val standardParams: Standar case InvalidJobOutputs(errors) => val exception = new MessageAggregation { override def exceptionContext: String = "Failed to evaluate job outputs" - override def errorMessages: Traversable[String] = errors.toList + override def errorMessages: Iterable[String] = errors.toList } FailedNonRetryableExecutionHandle(exception, kvPairsToSave = None) case JobOutputsEvaluationException(exception: Exception) if retryEvaluateOutputsAggregated(exception) => From e802f29549d15d807bff65d661d86883c940336e Mon Sep 17 00:00:00 2001 From: Katrina P <68349264+kpierre13@users.noreply.github.com> Date: Wed, 24 Aug 2022 13:09:32 -0400 Subject: [PATCH 072/326] BW-1206 - Combine all Wes Endpoints & add Tests (#6833) * Add tests, getting frid of WesRunRoutes.scala * wesWorkflowId fix, ec implicits errors gone * Refactoring path for GET /runs * Indentation fix * Commit to rollback * Revert "Indentation fix" This reverts commit 63fc4842c9d4eff68ec9cb7c3ef19e110696598b. * PR trigger * Optimize imports * Missed import --- .../cromwell/server/CromwellServer.scala | 4 +- .../routes/CromwellApiService.scala | 164 +++++++++--------- .../routes/WesCromwellRouteSupport.scala | 1 + .../routes/wes/WesRouteSupport.scala | 152 +++++++++++----- .../webservice/routes/wes/WesRunRoutes.scala | 94 ---------- .../routes/CromwellApiServiceSpec.scala | 21 ++- .../routes/wes/WesRouteSupportSpec.scala | 70 +++++++- 7 files changed, 266 insertions(+), 240 deletions(-) delete mode 100644 engine/src/main/scala/cromwell/webservice/routes/wes/WesRunRoutes.scala diff --git a/engine/src/main/scala/cromwell/server/CromwellServer.scala b/engine/src/main/scala/cromwell/server/CromwellServer.scala index b5705b91dae..76f784875fc 100644 --- a/engine/src/main/scala/cromwell/server/CromwellServer.scala +++ b/engine/src/main/scala/cromwell/server/CromwellServer.scala @@ -11,7 +11,6 @@ import cromwell.services.instrumentation.CromwellInstrumentationActor import cromwell.webservice.SwaggerService import cromwell.webservice.routes.CromwellApiService import cromwell.webservice.routes.wes.WesRouteSupport -import cromwell.webservice.routes.wes.WesRunRoutes import scala.concurrent.Future import scala.util.{Failure, Success} @@ -37,7 +36,6 @@ class CromwellServerActor(cromwellSystem: CromwellSystem, gracefulShutdown: Bool with CromwellApiService with CromwellInstrumentationActor with WesRouteSupport - with WesRunRoutes with SwaggerService with ActorLogging { implicit val actorSystem = context.system @@ -53,7 +51,7 @@ class CromwellServerActor(cromwellSystem: CromwellSystem, gracefulShutdown: Bool * cromwell.yaml is broken unless the swagger index.html is patched. Copy/paste the code from rawls or cromiam if * actual cromwell+swagger+oauth+/api support is needed. */ - val apiRoutes: Route = pathPrefix("api")(concat(workflowRoutes, womtoolRoutes, wesRoutes, runRoutes)) + val apiRoutes: Route = pathPrefix("api")(concat(workflowRoutes, womtoolRoutes, wesRoutes)) val nonApiRoutes: Route = concat(engineRoutes, swaggerUiResourceRoute) val allRoutes: Route = concat(apiRoutes, nonApiRoutes) diff --git a/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala b/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala index 47aa8b57b5c..637509cc4fd 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala @@ -38,11 +38,9 @@ import scala.io.Source import scala.util.{Failure, Success, Try} trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport with WomtoolRouteSupport with WebServiceUtils with WesCromwellRouteSupport { - import CromwellApiService._ implicit def actorRefFactory: ActorRefFactory - implicit val materializer: ActorMaterializer implicit val ec: ExecutionContext @@ -57,9 +55,7 @@ trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport w } }, path("engine" / Segment / "version") { _ => - get { - complete(versionResponse) - } + get { complete(versionResponse) } }, path("engine" / Segment / "status") { _ => onComplete(serviceRegistryActor.ask(GetCurrentStatus).mapTo[StatusCheckResponse]) { @@ -74,11 +70,7 @@ trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport w val workflowRoutes = path("workflows" / Segment / "backends") { _ => - get { - instrumentRequest { - complete(ToResponseMarshallable(backendResponse)) - } - } + get { instrumentRequest { complete(ToResponseMarshallable(backendResponse)) } } } ~ path("workflows" / Segment / "callcaching" / "diff") { _ => parameterSeq { parameters => @@ -144,7 +136,7 @@ trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport w val response = validateWorkflowIdInMetadata(possibleWorkflowId, serviceRegistryActor) flatMap { workflowId => workflowStoreActor.ask(WorkflowStoreActor.WorkflowOnHoldToSubmittedCommand(workflowId)).mapTo[WorkflowStoreEngineActor.WorkflowOnHoldToSubmittedResponse] } - onComplete(response) { + onComplete(response){ case Success(WorkflowStoreEngineActor.WorkflowOnHoldToSubmittedFailure(_, e: NotInOnHoldStateException)) => e.errorRequest(StatusCodes.Forbidden) case Success(WorkflowStoreEngineActor.WorkflowOnHoldToSubmittedFailure(_, e)) => e.errorRequest(StatusCodes.InternalServerError) case Success(r: WorkflowStoreEngineActor.WorkflowOnHoldToSubmittedSuccess) => completeResponse(StatusCodes.OK, toResponse(r.workflowId, WorkflowSubmitted), Seq.empty) @@ -180,93 +172,93 @@ trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport w case Failure(e) => e.failRequest(StatusCodes.InternalServerError) } } -} - object CromwellApiService { +} - import spray.json._ +object CromwellApiService { + import spray.json._ - /** - * Sends a request to abort the workflow. Provides configurable success & error handlers to allow - * for different API endpoints to provide different effects in the appropriate situations, e.g. HTTP codes - * and error messages - */ - def abortWorkflow(possibleWorkflowId: String, - workflowStoreActor: ActorRef, - workflowManagerActor: ActorRef, - successHandler: PartialFunction[SuccessfulAbortResponse, Route] = standardAbortSuccessHandler, - errorHandler: PartialFunction[Throwable, Route] = standardAbortErrorHandler) - (implicit timeout: Timeout): Route = { - handleExceptions(ExceptionHandler(errorHandler)) { - Try(WorkflowId.fromString(possibleWorkflowId)) match { - case Success(workflowId) => - val response = workflowStoreActor.ask(WorkflowStoreActor.AbortWorkflowCommand(workflowId)).mapTo[AbortResponse] - onComplete(response) { - case Success(x: SuccessfulAbortResponse) => successHandler(x) - case Success(x: WorkflowAbortFailureResponse) => throw x.failure - case Failure(e) => throw e - } - case Failure(_) => throw InvalidWorkflowException(possibleWorkflowId) - } + /** + * Sends a request to abort the workflow. Provides configurable success & error handlers to allow + * for different API endpoints to provide different effects in the appropriate situations, e.g. HTTP codes + * and error messages + */ + def abortWorkflow(possibleWorkflowId: String, + workflowStoreActor: ActorRef, + workflowManagerActor: ActorRef, + successHandler: PartialFunction[SuccessfulAbortResponse, Route] = standardAbortSuccessHandler, + errorHandler: PartialFunction[Throwable, Route] = standardAbortErrorHandler) + (implicit timeout: Timeout): Route = { + handleExceptions(ExceptionHandler(errorHandler)) { + Try(WorkflowId.fromString(possibleWorkflowId)) match { + case Success(workflowId) => + val response = workflowStoreActor.ask(WorkflowStoreActor.AbortWorkflowCommand(workflowId)).mapTo[AbortResponse] + onComplete(response) { + case Success(x: SuccessfulAbortResponse) => successHandler(x) + case Success(x: WorkflowAbortFailureResponse) => throw x.failure + case Failure(e) => throw e + } + case Failure(_) => throw InvalidWorkflowException(possibleWorkflowId) } } + } - /** - * The abort success handler for typical cases, i.e. cromwell's API. - */ - private def standardAbortSuccessHandler: PartialFunction[SuccessfulAbortResponse, Route] = { - case WorkflowAbortedResponse(id) => complete(ToResponseMarshallable(WorkflowAbortResponse(id.toString, WorkflowAborted.toString))) - case WorkflowAbortRequestedResponse(id) => complete(ToResponseMarshallable(WorkflowAbortResponse(id.toString, WorkflowAborting.toString))) - } + /** + * The abort success handler for typical cases, i.e. cromwell's API. + */ + private def standardAbortSuccessHandler: PartialFunction[SuccessfulAbortResponse, Route] = { + case WorkflowAbortedResponse(id) => complete(ToResponseMarshallable(WorkflowAbortResponse(id.toString, WorkflowAborted.toString))) + case WorkflowAbortRequestedResponse(id) => complete(ToResponseMarshallable(WorkflowAbortResponse(id.toString, WorkflowAborting.toString))) + } - /** - * The abort error handler for typical cases, i.e. cromwell's API - */ - private def standardAbortErrorHandler: PartialFunction[Throwable, Route] = { - case e: InvalidWorkflowException => e.failRequest(StatusCodes.BadRequest) - case e: WorkflowNotFoundException => e.errorRequest(StatusCodes.NotFound) - case _: AskTimeoutException if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse - case e: TimeoutException => e.failRequest(StatusCodes.ServiceUnavailable) - case e: Exception => e.errorRequest(StatusCodes.InternalServerError) - } + /** + * The abort error handler for typical cases, i.e. cromwell's API + */ + private def standardAbortErrorHandler: PartialFunction[Throwable, Route] = { + case e: InvalidWorkflowException => e.failRequest(StatusCodes.BadRequest) + case e: WorkflowNotFoundException => e.errorRequest(StatusCodes.NotFound) + case _: AskTimeoutException if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse + case e: TimeoutException => e.failRequest(StatusCodes.ServiceUnavailable) + case e: Exception => e.errorRequest(StatusCodes.InternalServerError) + } - def validateWorkflowIdInMetadata(possibleWorkflowId: String, - serviceRegistryActor: ActorRef) - (implicit timeout: Timeout, executor: ExecutionContext): Future[WorkflowId] = { - Try(WorkflowId.fromString(possibleWorkflowId)) match { - case Success(w) => - serviceRegistryActor.ask(ValidateWorkflowIdInMetadata(w)).mapTo[WorkflowValidationResponse] flatMap { - case RecognizedWorkflowId => Future.successful(w) - case UnrecognizedWorkflowId => validateWorkflowIdInMetadataSummaries(possibleWorkflowId, serviceRegistryActor) - case FailedToCheckWorkflowId(t) => Future.failed(t) - } - case Failure(_) => Future.failed(InvalidWorkflowException(possibleWorkflowId)) - } + def validateWorkflowIdInMetadata(possibleWorkflowId: String, + serviceRegistryActor: ActorRef) + (implicit timeout: Timeout, executor: ExecutionContext): Future[WorkflowId] = { + Try(WorkflowId.fromString(possibleWorkflowId)) match { + case Success(w) => + serviceRegistryActor.ask(ValidateWorkflowIdInMetadata(w)).mapTo[WorkflowValidationResponse] flatMap { + case RecognizedWorkflowId => Future.successful(w) + case UnrecognizedWorkflowId => validateWorkflowIdInMetadataSummaries(possibleWorkflowId, serviceRegistryActor) + case FailedToCheckWorkflowId(t) => Future.failed(t) + } + case Failure(_) => Future.failed(InvalidWorkflowException(possibleWorkflowId)) } + } - def validateWorkflowIdInMetadataSummaries(possibleWorkflowId: String, - serviceRegistryActor: ActorRef) - (implicit timeout: Timeout, executor: ExecutionContext): Future[WorkflowId] = { - Try(WorkflowId.fromString(possibleWorkflowId)) match { - case Success(w) => - serviceRegistryActor.ask(ValidateWorkflowIdInMetadataSummaries(w)).mapTo[WorkflowValidationResponse] map { - case RecognizedWorkflowId => w - case UnrecognizedWorkflowId => throw UnrecognizedWorkflowException(w) - case FailedToCheckWorkflowId(t) => throw t - } - case Failure(_) => Future.failed(InvalidWorkflowException(possibleWorkflowId)) - } + def validateWorkflowIdInMetadataSummaries(possibleWorkflowId: String, + serviceRegistryActor: ActorRef) + (implicit timeout: Timeout, executor: ExecutionContext): Future[WorkflowId] = { + Try(WorkflowId.fromString(possibleWorkflowId)) match { + case Success(w) => + serviceRegistryActor.ask(ValidateWorkflowIdInMetadataSummaries(w)).mapTo[WorkflowValidationResponse] map { + case RecognizedWorkflowId => w + case UnrecognizedWorkflowId => throw UnrecognizedWorkflowException(w) + case FailedToCheckWorkflowId(t) => throw t + } + case Failure(_) => Future.failed(InvalidWorkflowException(possibleWorkflowId)) } + } - final case class BackendResponse(supportedBackends: List[String], defaultBackend: String) + final case class BackendResponse(supportedBackends: List[String], defaultBackend: String) - final case class UnrecognizedWorkflowException(id: WorkflowId) extends Exception(s"Unrecognized workflow ID: $id") + final case class UnrecognizedWorkflowException(id: WorkflowId) extends Exception(s"Unrecognized workflow ID: $id") - final case class InvalidWorkflowException(possibleWorkflowId: String) extends Exception(s"Invalid workflow ID: '$possibleWorkflowId'.") + final case class InvalidWorkflowException(possibleWorkflowId: String) extends Exception(s"Invalid workflow ID: '$possibleWorkflowId'.") - val cromwellVersion = VersionUtil.getVersion("cromwell-engine") - val swaggerUiVersion = VersionUtil.getVersion("swagger-ui", VersionUtil.sbtDependencyVersion("swaggerUi")) - val backendResponse = BackendResponse(BackendConfiguration.AllBackendEntries.map(_.name).sorted, BackendConfiguration.DefaultBackendEntry.name) - val versionResponse = JsObject(Map("cromwell" -> cromwellVersion.toJson)) - val serviceShuttingDownResponse = new Exception("Cromwell service is shutting down.").failRequest(StatusCodes.ServiceUnavailable) - } + val cromwellVersion = VersionUtil.getVersion("cromwell-engine") + val swaggerUiVersion = VersionUtil.getVersion("swagger-ui", VersionUtil.sbtDependencyVersion("swaggerUi")) + val backendResponse = BackendResponse(BackendConfiguration.AllBackendEntries.map(_.name).sorted, BackendConfiguration.DefaultBackendEntry.name) + val versionResponse = JsObject(Map("cromwell" -> cromwellVersion.toJson)) + val serviceShuttingDownResponse = new Exception("Cromwell service is shutting down.").failRequest(StatusCodes.ServiceUnavailable) +} \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/webservice/routes/WesCromwellRouteSupport.scala b/engine/src/main/scala/cromwell/webservice/routes/WesCromwellRouteSupport.scala index 95cde16cd22..349d36a9251 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/WesCromwellRouteSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/WesCromwellRouteSupport.scala @@ -31,6 +31,7 @@ trait WesCromwellRouteSupport extends WebServiceUtils { implicit val timeout: Timeout = duration implicit def actorRefFactory: ActorRefFactory + implicit val materializer: ActorMaterializer implicit val ec: ExecutionContext diff --git a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala index e4969702460..7ec56e3317a 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala @@ -1,31 +1,38 @@ package cromwell.webservice.routes.wes import akka.actor.ActorRef +import akka.http.scaladsl.model.{StatusCode, StatusCodes} import akka.http.scaladsl.server.Directives._ -import akka.http.scaladsl.server.Route +import akka.http.scaladsl.server.directives.RouteDirectives.complete +import akka.http.scaladsl.server.{Directive1, Route} import akka.pattern.{AskTimeoutException, ask} import akka.util.Timeout +import com.typesafe.config.ConfigFactory +import cromwell.core.WorkflowId +import cromwell.core.abort.SuccessfulAbortResponse import cromwell.engine.instrumentation.HttpInstrumentation -import cromwell.services.metadata.MetadataService.{GetStatus, MetadataServiceResponse, StatusLookupFailed} -import cromwell.webservice.routes.CromwellApiService.{UnrecognizedWorkflowException, validateWorkflowIdInMetadata} +import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException +import cromwell.server.CromwellShutdown +import cromwell.services.metadata.MetadataService.{BuildMetadataJsonAction, GetSingleWorkflowMetadataAction, GetStatus, MetadataServiceResponse, StatusLookupFailed} +import cromwell.services.{FailedMetadataJsonResponse, SuccessfulMetadataJsonResponse} import cromwell.webservice.WebServiceUtils.EnhancedThrowable +import cromwell.webservice.routes.CromwellApiService.{UnrecognizedWorkflowException, validateWorkflowIdInMetadata} +import cromwell.webservice.routes.MetadataRouteSupport.{metadataBuilderActorRequest, metadataQueryRequest} +import cromwell.webservice.routes.wes.WesResponseJsonSupport._ +import cromwell.webservice.routes.wes.WesRouteSupport._ +import cromwell.webservice.routes.{CromwellApiService, WesCromwellRouteSupport} +import net.ceedubs.ficus.Ficus._ -import scala.concurrent.ExecutionContext +import scala.concurrent.duration.FiniteDuration +import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} -import WesResponseJsonSupport._ -import akka.http.scaladsl.model.{StatusCode, StatusCodes} -import akka.http.scaladsl.server.directives.RouteDirectives.complete -import WesRouteSupport._ -import cromwell.core.abort.SuccessfulAbortResponse -import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException -import cromwell.server.CromwellShutdown -import cromwell.services.SuccessfulMetadataJsonResponse -import cromwell.webservice.routes.CromwellApiService -trait WesRouteSupport extends HttpInstrumentation { + + +trait WesRouteSupport extends HttpInstrumentation with WesCromwellRouteSupport { + val serviceRegistryActor: ActorRef val workflowManagerActor: ActorRef - val workflowStoreActor: ActorRef implicit val ec: ExecutionContext implicit val timeout: Timeout @@ -49,36 +56,53 @@ trait WesRouteSupport extends HttpInstrumentation { pathPrefix("ga4gh" / "wes" / "v1") { concat( path("service-info") { - complete(ServiceInfo.toWesResponse(workflowStoreActor)) + get { + complete(ServiceInfo.toWesResponse(workflowStoreActor)) + } }, - pathPrefix("runs") { - concat( - path(Segment / "status") { possibleWorkflowId => - val response = validateWorkflowIdInMetadata(possibleWorkflowId, serviceRegistryActor).flatMap(w => serviceRegistryActor.ask(GetStatus(w)).mapTo[MetadataServiceResponse]) - // WES can also return a 401 or a 403 but that requires user auth knowledge which Cromwell doesn't currently have - onComplete(response) { - case Success(SuccessfulMetadataJsonResponse(_, jsObject)) => - val wesState = WesState.fromCromwellStatusJson(jsObject) - complete(WesRunStatus(possibleWorkflowId, wesState)) - case Success(r: StatusLookupFailed) => r.reason.errorRequest(StatusCodes.InternalServerError) - case Success(m: MetadataServiceResponse) => - // This should never happen, but .... - val error = new IllegalStateException("Unexpected response from Metadata service: " + m) - error.errorRequest(StatusCodes.InternalServerError) - case Failure(_: UnrecognizedWorkflowException) => complete(NotFoundError) - case Failure(e) => complete(WesErrorResponse(e.getMessage, StatusCodes.InternalServerError.intValue)) - } - }, - path(Segment / "cancel") { possibleWorkflowId => - post { - CromwellApiService.abortWorkflow(possibleWorkflowId, - workflowStoreActor, - workflowManagerActor, - successHandler = WesAbortSuccessHandler, - errorHandler = WesAbortErrorHandler) + path("runs") { + get { + parameters(("page_size".as[Int].?, "page_token".?)) { (pageSize, pageToken) => + completeCromwellResponse(listRuns(pageSize, pageToken, serviceRegistryActor)) + } + } ~ + post { + extractSubmission() { submission => + submitRequest(submission.entity, + isSingleSubmission = true) } } - ) + }, + path("runs" / Segment) { workflowId => + get { + // this is what it was like in code found in the project… it perhaps isn’t ideal but doesn’t seem to hurt, so leaving it like this for now. + completeCromwellResponse(runLog(workflowId, (w: WorkflowId) => GetSingleWorkflowMetadataAction(w, None, None, expandSubWorkflows = false), serviceRegistryActor)) + } + }, + path("runs" / Segment / "status") { possibleWorkflowId => + val response = validateWorkflowIdInMetadata(possibleWorkflowId, serviceRegistryActor).flatMap(w => serviceRegistryActor.ask(GetStatus(w)).mapTo[MetadataServiceResponse]) + // WES can also return a 401 or a 403 but that requires user auth knowledge which Cromwell doesn't currently have + onComplete(response) { + case Success(SuccessfulMetadataJsonResponse(_, jsObject)) => + val wesState = WesState.fromCromwellStatusJson(jsObject) + complete(WesRunStatus(possibleWorkflowId, wesState)) + case Success(r: StatusLookupFailed) => r.reason.errorRequest(StatusCodes.InternalServerError) + case Success(m: MetadataServiceResponse) => + // This should never happen, but .... + val error = new IllegalStateException("Unexpected response from Metadata service: " + m) + error.errorRequest(StatusCodes.InternalServerError) + case Failure(_: UnrecognizedWorkflowException) => complete(NotFoundError) + case Failure(e) => complete(WesErrorResponse(e.getMessage, StatusCodes.InternalServerError.intValue)) + } + }, + path("runs" / Segment / "cancel") { possibleWorkflowId => + post { + CromwellApiService.abortWorkflow(possibleWorkflowId, + workflowStoreActor, + workflowManagerActor, + successHandler = WesAbortSuccessHandler, + errorHandler = WesAbortErrorHandler) + } } ) } @@ -86,7 +110,15 @@ trait WesRouteSupport extends HttpInstrumentation { } } + + object WesRouteSupport { + import WesResponseJsonSupport._ + + implicit lazy val duration: FiniteDuration = ConfigFactory.load().as[FiniteDuration]("akka.http.server.request-timeout") + implicit lazy val timeout: Timeout = duration + import scala.concurrent.ExecutionContext.Implicits.global + val NotFoundError = WesErrorResponse("The requested workflow run wasn't found", StatusCodes.NotFound.intValue) def WesAbortSuccessHandler: PartialFunction[SuccessfulAbortResponse, Route] = { @@ -104,4 +136,38 @@ object WesRouteSupport { private def respondWithWesError(errorMsg: String, status: StatusCode): Route = { complete((status, WesErrorResponse(errorMsg, status.intValue))) } -} + + def extractSubmission(): Directive1[WesSubmission] = { + formFields(( + "workflow_params".?, + "workflow_type".?, + "workflow_type_version".?, + "tags".?, + "workflow_engine_parameters".?, + "workflow_url".?, + "workflow_attachment".as[String].* + )).as(WesSubmission) + } + + def completeCromwellResponse(future: => Future[WesResponse]): Route = { + onComplete(future) { + case Success(response: WesResponse) => complete(response) + case Failure(e) => complete(WesErrorResponse(e.getMessage, StatusCodes.InternalServerError.intValue)) + } + } + + def listRuns(pageSize: Option[Int], pageToken: Option[String], serviceRegistryActor: ActorRef): Future[WesResponse] = { + // FIXME: to handle - page_size, page_token + // FIXME: How to handle next_page_token in response? + metadataQueryRequest(Seq.empty[(String, String)], serviceRegistryActor).map(RunListResponse.fromMetadataQueryResponse) + } + + def runLog(workflowId: String, request: WorkflowId => BuildMetadataJsonAction, serviceRegistryActor: ActorRef): Future[WesResponse] = { + val metadataJsonResponse = metadataBuilderActorRequest(workflowId, request, serviceRegistryActor) + + metadataJsonResponse.map { + case SuccessfulMetadataJsonResponse(_, responseJson) => WesResponseWorkflowMetadata(WesRunLog.fromJson(responseJson.toString())) + case FailedMetadataJsonResponse(_, reason) => WesErrorResponse(reason.getMessage, StatusCodes.InternalServerError.intValue) + } + } +} \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRunRoutes.scala b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRunRoutes.scala deleted file mode 100644 index 6ecbac4bdfa..00000000000 --- a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRunRoutes.scala +++ /dev/null @@ -1,94 +0,0 @@ -package cromwell.webservice.routes.wes - -import akka.actor.ActorRef -import akka.http.scaladsl.model.StatusCodes -import akka.http.scaladsl.server.Directives._ -import akka.http.scaladsl.server.directives.RouteDirectives.complete -import akka.http.scaladsl.server.{Directive1, Route} -import akka.util.Timeout -import com.typesafe.config.ConfigFactory -import cromwell.core.WorkflowId -import cromwell.services.metadata.MetadataService.{BuildMetadataJsonAction, GetSingleWorkflowMetadataAction} -import cromwell.services.{FailedMetadataJsonResponse, SuccessfulMetadataJsonResponse} -import cromwell.webservice.routes.MetadataRouteSupport.{metadataBuilderActorRequest, metadataQueryRequest} -import cromwell.webservice.routes.WesCromwellRouteSupport -import cromwell.webservice.routes.wes.WesRunRoutes.{completeCromwellResponse, extractSubmission, runLog} -import net.ceedubs.ficus.Ficus._ - -import scala.concurrent.ExecutionContext.Implicits.global -import scala.concurrent.Future -import scala.concurrent.duration.FiniteDuration -import scala.util.{Failure, Success} - -trait WesRunRoutes extends WesCromwellRouteSupport { - - val serviceRegistryActor: ActorRef - - lazy val runRoutes: Route = - pathPrefix("ga4gh" / "wes" / "v1") { - concat( - path("runs") { - get { - parameters(("page_size".as[Int].?, "page_token".?)) { (pageSize, pageToken) => - WesRunRoutes.completeCromwellResponse(WesRunRoutes.listRuns(pageSize, pageToken, serviceRegistryActor)) - } - } ~ - post { - extractSubmission() { submission => - submitRequest(submission.entity, - isSingleSubmission = true, - ) - } - } - }, - path("runs" / Segment) { workflowId => - get { - // this is what it was like in code found in the project… it perhaps isn’t ideal but doesn’t seem to hurt, so leaving it like this for now. - completeCromwellResponse(runLog(workflowId, (w: WorkflowId) => GetSingleWorkflowMetadataAction(w, None, None, expandSubWorkflows = false), serviceRegistryActor)) - } - } - ) - } -} - -object WesRunRoutes { - - import WesResponseJsonSupport._ - - implicit lazy val duration: FiniteDuration = ConfigFactory.load().as[FiniteDuration]("akka.http.server.request-timeout") - implicit lazy val timeout: Timeout = duration - - def extractSubmission(): Directive1[WesSubmission] = { - formFields(( - "workflow_params".?, - "workflow_type".?, - "workflow_type_version".?, - "tags".?, - "workflow_engine_parameters".?, - "workflow_url".?, - "workflow_attachment".as[String].* - )).as(WesSubmission) - } - - def completeCromwellResponse(future: => Future[WesResponse]): Route = { - onComplete(future) { - case Success(response: WesResponse) => complete(response) - case Failure(e) => complete(WesErrorResponse(e.getMessage, StatusCodes.InternalServerError.intValue)) - } - } - - def listRuns(pageSize: Option[Int], pageToken: Option[String], serviceRegistryActor: ActorRef): Future[WesResponse] = { - // FIXME: to handle - page_size, page_token - // FIXME: How to handle next_page_token in response? - metadataQueryRequest(Seq.empty[(String, String)], serviceRegistryActor).map(RunListResponse.fromMetadataQueryResponse) - } - - def runLog(workflowId: String, request: WorkflowId => BuildMetadataJsonAction, serviceRegistryActor: ActorRef): Future[WesResponse] = { - val metadataJsonResponse = metadataBuilderActorRequest(workflowId, request, serviceRegistryActor) - - metadataJsonResponse.map { - case SuccessfulMetadataJsonResponse(_, responseJson) => WesResponseWorkflowMetadata(WesRunLog.fromJson(responseJson.toString())) - case FailedMetadataJsonResponse(_, reason) => WesErrorResponse(reason.getMessage, StatusCodes.InternalServerError.intValue) - } - } -} diff --git a/engine/src/test/scala/cromwell/webservice/routes/CromwellApiServiceSpec.scala b/engine/src/test/scala/cromwell/webservice/routes/CromwellApiServiceSpec.scala index 2a3b11a41c5..c40c84397f7 100644 --- a/engine/src/test/scala/cromwell/webservice/routes/CromwellApiServiceSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/routes/CromwellApiServiceSpec.scala @@ -1,7 +1,5 @@ package cromwell.webservice.routes -import java.time.OffsetDateTime - import akka.actor.{Actor, ActorLogging, ActorSystem, Props} import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ import akka.http.scaladsl.model.ContentTypes._ @@ -17,13 +15,13 @@ import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ import cromwell.engine.workflow.workflowstore.WorkflowStoreEngineActor.{WorkflowOnHoldToSubmittedFailure, WorkflowOnHoldToSubmittedSuccess} import cromwell.engine.workflow.workflowstore.WorkflowStoreSubmitActor.{WorkflowSubmittedToStore, WorkflowsBatchSubmittedToStore} +import cromwell.services._ import cromwell.services.healthmonitor.ProtoHealthMonitorServiceActor.{GetCurrentStatus, StatusCheckResponse, SubsystemStatus} import cromwell.services.instrumentation.InstrumentationService.InstrumentationServiceMessage import cromwell.services.metadata.MetadataArchiveStatus._ import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ import cromwell.services.metadata.impl.builder.MetadataBuilderActor -import cromwell.services._ import cromwell.services.womtool.WomtoolServiceMessages.{DescribeFailure, DescribeRequest, DescribeSuccess} import cromwell.services.womtool.models.WorkflowDescription import cromwell.util.SampleWdl.HelloWorld @@ -34,6 +32,7 @@ import org.scalatest.flatspec.AsyncFlatSpec import org.scalatest.matchers.should.Matchers import spray.json._ +import java.time.OffsetDateTime import scala.concurrent.duration._ class CromwellApiServiceSpec extends AsyncFlatSpec with ScalatestRouteTest with Matchers { @@ -529,6 +528,7 @@ object CromwellApiServiceSpec { val WorkflowIdExistingOnlyInSummaryTable = WorkflowId.fromString("f0000000-0000-0000-0000-000000000011") val ArchivedWorkflowId = WorkflowId.fromString("c4c6339c-2145-47fb-acc5-b5cb8d2809f5") val ArchivedAndDeletedWorkflowId = WorkflowId.fromString("abc1234d-2145-47fb-acc5-b5cb8d2809f5") + val wesWorkflowId = WorkflowId.randomId() val SummarizedWorkflowIds = Set( SummarizedWorkflowId, WorkflowIdExistingOnlyInSummaryTable, @@ -545,7 +545,8 @@ object CromwellApiServiceSpec { FailedWorkflowId, SummarizedWorkflowId, ArchivedWorkflowId, - ArchivedAndDeletedWorkflowId + ArchivedAndDeletedWorkflowId, + wesWorkflowId ) class MockApiService()(implicit val system: ActorSystem) extends CromwellApiService { @@ -564,13 +565,21 @@ object CromwellApiServiceSpec { List( MetadataEvent(MetadataKey(workflowId, None, "testKey1a"), MetadataValue("myValue1a", MetadataString)), MetadataEvent(MetadataKey(workflowId, None, "testKey1b"), MetadataValue("myValue1b", MetadataString)), - MetadataEvent(MetadataKey(workflowId, None, "testKey2a"), MetadataValue("myValue2a", MetadataString)) + MetadataEvent(MetadataKey(workflowId, None, "testKey2a"), MetadataValue("myValue2a", MetadataString)), + ) + } + private def wesFullMetadataResponse(workflowId: WorkflowId) = { + List( + MetadataEvent(MetadataKey(workflowId, None, "status"), MetadataValue("Running", MetadataString)), + MetadataEvent(MetadataKey(workflowId, None, "submittedFiles:workflow"), MetadataValue("myValue2a", MetadataString)), + ) } def responseMetadataValues(workflowId: WorkflowId, withKeys: List[String], withoutKeys: List[String]): JsObject = { def keyFilter(keys: List[String])(m: MetadataEvent) = keys.exists(k => m.key.key.startsWith(k)) - val events = fullMetadataResponse(workflowId) + val metadataEvents = if (workflowId == wesWorkflowId) wesFullMetadataResponse(workflowId) else fullMetadataResponse(workflowId) + val events = metadataEvents .filter(m => withKeys.isEmpty || keyFilter(withKeys)(m)) .filter(m => withoutKeys.isEmpty || !keyFilter(withoutKeys)(m)) diff --git a/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala b/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala index 0c0fe1c8054..c8306088ded 100644 --- a/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala @@ -1,22 +1,27 @@ package cromwell.webservice.routes.wes import akka.actor.Props -import akka.http.scaladsl.model.StatusCodes import akka.http.scaladsl.model.HttpMethods.POST -import akka.http.scaladsl.testkit.ScalatestRouteTest +import akka.http.scaladsl.model._ +import akka.http.scaladsl.server.MethodRejection +import akka.http.scaladsl.testkit.{RouteTestTimeout, ScalatestRouteTest} +import cromwell.util.SampleWdl.HelloWorld import cromwell.webservice.routes.CromwellApiServiceSpec - -import scala.concurrent.duration._ import cromwell.webservice.routes.CromwellApiServiceSpec.{MockServiceRegistryActor, MockWorkflowManagerActor, MockWorkflowStoreActor} +import cromwell.webservice.routes.wes.WesResponseJsonSupport._ import org.scalatest.flatspec.AsyncFlatSpec import org.scalatest.matchers.should.Matchers -import WesResponseJsonSupport._ -import akka.http.scaladsl.server.MethodRejection +import spray.json._ + +import scala.concurrent.duration._ class WesRouteSupportSpec extends AsyncFlatSpec with ScalatestRouteTest with Matchers with WesRouteSupport { + val actorRefFactory = system override implicit val ec = system.dispatcher - override implicit val timeout = 5.seconds + override val timeout = routeTestTimeout.duration + implicit def routeTestTimeout = RouteTestTimeout(5.seconds) + override val workflowStoreActor = actorRefFactory.actorOf(Props(new MockWorkflowStoreActor())) override val serviceRegistryActor = actorRefFactory.actorOf(Props(new MockServiceRegistryActor())) @@ -149,4 +154,53 @@ class WesRouteSupportSpec extends AsyncFlatSpec with ScalatestRouteTest with Mat rejection shouldEqual MethodRejection(POST) } } -} + + behavior of "WES API /runs POST endpoint" + it should "return 201 for a successful workflow submission" in { + val workflowSource = Multipart.FormData.BodyPart("workflow_url", HttpEntity(MediaTypes.`application/json`, "https://raw.githubusercontent.com/broadinstitute/cromwell/develop/womtool/src/test/resources/validate/wdl_draft3/valid/callable_imports/my_workflow.wdl")) + val workflowInputs = Multipart.FormData.BodyPart("workflow_params", HttpEntity(MediaTypes.`application/json`, HelloWorld.rawInputs.toJson.toString())) + val formData = Multipart.FormData(workflowSource, workflowInputs).toEntity() + Post(s"/ga4gh/wes/$version/runs", formData) ~> + wesRoutes ~> + check { + assertResult( + s"""{ + | "id": "${CromwellApiServiceSpec.ExistingWorkflowId.toString}", + | "status": "Submitted" + |}""".stripMargin) { + responseAs[String].parseJson.prettyPrint + } + assertResult(StatusCodes.Created) { + status + } + headers should be(Seq.empty) + } + } + + + behavior of "WES API /runs GET endpoint" + it should "return results for a good query" in { + Get(s"/ga4gh/wes/v1/runs") ~> + wesRoutes ~> + check { + status should be(StatusCodes.OK) + contentType should be(ContentTypes.`application/json`) + val results = responseAs[JsObject].fields("runs").convertTo[Seq[JsObject]] + results.head.fields("run_id") should be(JsString(CromwellApiServiceSpec.ExistingWorkflowId.toString)) + results.head.fields("state") should be(JsString("COMPLETE")) + } + } + + behavior of "WES API /runs/{run_id} endpoint" + it should "return valid metadata when supplied a run_id" in { + Get(s"/ga4gh/wes/v1/runs/${CromwellApiServiceSpec.wesWorkflowId}") ~> + wesRoutes ~> + check { + status should be(StatusCodes.OK) + val result = responseAs[JsObject].fields("workflowLog").asJsObject() + result.fields.keys should contain allOf("request", "run_id", "state") + result.fields("state") should be(JsString("RUNNING")) + result.fields("run_id") should be(JsString(CromwellApiServiceSpec.wesWorkflowId.toString)) + } + } +} \ No newline at end of file From 79afa2997f91718cbbacc3aa68ca4ee086b6d7f8 Mon Sep 17 00:00:00 2001 From: Katrina P <68349264+kpierre13@users.noreply.github.com> Date: Thu, 25 Aug 2022 15:15:55 -0400 Subject: [PATCH 073/326] BW-1354 - Porting CBAS preliminary step (#6837) * Getting rid of shared utility file; Adding/Updating WES version of submit. * Edit spec file * Adding Wes-like error --- .../routes/CromwellApiService.scala | 79 ++++++++++++++-- .../routes/WesCromwellRouteSupport.scala | 90 ------------------- .../routes/wes/WesRouteSupport.scala | 78 ++++++++++++++-- .../routes/wes/WesRouteSupportSpec.scala | 3 +- 4 files changed, 141 insertions(+), 109 deletions(-) delete mode 100644 engine/src/main/scala/cromwell/webservice/routes/WesCromwellRouteSupport.scala diff --git a/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala b/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala index 637509cc4fd..a1c4f023135 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/CromwellApiService.scala @@ -1,10 +1,13 @@ package cromwell.webservice.routes +import java.util.UUID + import akka.actor.{ActorRef, ActorRefFactory} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActorJsonFormatting.successfulResponseJsonFormatter import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ import akka.http.scaladsl.marshalling.ToResponseMarshallable -import akka.http.scaladsl.model.ContentTypes._ import akka.http.scaladsl.model._ +import akka.http.scaladsl.model.ContentTypes._ import akka.http.scaladsl.server.Directives._ import akka.http.scaladsl.server.{ExceptionHandler, Route} import akka.pattern.{AskTimeoutException, ask} @@ -12,6 +15,7 @@ import akka.stream.ActorMaterializer import akka.util.Timeout import cats.data.NonEmptyList import cats.data.Validated.{Invalid, Valid} +import com.typesafe.config.ConfigFactory import common.exception.AggregatedMessageException import common.util.VersionUtil import cromwell.core.abort._ @@ -20,24 +24,25 @@ import cromwell.engine.backend.BackendConfiguration import cromwell.engine.instrumentation.HttpInstrumentation import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActor.{CachedCallNotFoundException, CallCacheDiffActorResponse, FailedCallCacheDiffResponse, SuccessfulCallCacheDiffResponse} -import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCacheDiffActorJsonFormatting.successfulResponseJsonFormatter import cromwell.engine.workflow.lifecycle.execution.callcaching.{CallCacheDiffActor, CallCacheDiffQueryParameter} import cromwell.engine.workflow.workflowstore.SqlWorkflowStore.NotInOnHoldStateException -import cromwell.engine.workflow.workflowstore.{WorkflowStoreActor, WorkflowStoreEngineActor} +import cromwell.engine.workflow.workflowstore.{WorkflowStoreActor, WorkflowStoreEngineActor, WorkflowStoreSubmitActor} import cromwell.server.CromwellShutdown -import cromwell.services._ import cromwell.services.healthmonitor.ProtoHealthMonitorServiceActor.{GetCurrentStatus, StatusCheckResponse} import cromwell.services.metadata.MetadataService._ -import cromwell.webservice.WebServiceUtils.EnhancedThrowable -import cromwell.webservice.WorkflowJsonSupport._ import cromwell.webservice._ +import cromwell.services._ +import cromwell.webservice.WorkflowJsonSupport._ +import cromwell.webservice.WebServiceUtils +import cromwell.webservice.WebServiceUtils.EnhancedThrowable +import net.ceedubs.ficus.Ficus._ -import java.util.UUID +import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future, TimeoutException} import scala.io.Source import scala.util.{Failure, Success, Try} -trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport with WomtoolRouteSupport with WebServiceUtils with WesCromwellRouteSupport { +trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport with WomtoolRouteSupport with WebServiceUtils { import CromwellApiService._ implicit def actorRefFactory: ActorRefFactory @@ -48,6 +53,10 @@ trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport w val workflowManagerActor: ActorRef val serviceRegistryActor: ActorRef + // Derive timeouts (implicit and not) from akka http's request timeout since there's no point in being higher than that + implicit val duration = ConfigFactory.load().as[FiniteDuration]("akka.http.server.request-timeout") + implicit val timeout: Timeout = duration + val engineRoutes = concat( path("engine" / Segment / "stats") { _ => get { @@ -173,6 +182,58 @@ trait CromwellApiService extends HttpInstrumentation with MetadataRouteSupport w } } + private def toResponse(workflowId: WorkflowId, workflowState: WorkflowState): WorkflowSubmitResponse = { + WorkflowSubmitResponse(workflowId.toString, workflowState.toString) + } + + private def submitRequest(formData: Multipart.FormData, isSingleSubmission: Boolean): Route = { + + def getWorkflowState(workflowOnHold: Boolean): WorkflowState = { + if (workflowOnHold) + WorkflowOnHold + else WorkflowSubmitted + } + + def askSubmit(command: WorkflowStoreActor.WorkflowStoreActorSubmitCommand, warnings: Seq[String], workflowState: WorkflowState): Route = { + // NOTE: Do not blindly copy the akka-http -to- ask-actor pattern below without knowing the pros and cons. + onComplete(workflowStoreActor.ask(command).mapTo[WorkflowStoreSubmitActor.WorkflowStoreSubmitActorResponse]) { + case Success(w) => + w match { + case WorkflowStoreSubmitActor.WorkflowSubmittedToStore(workflowId, _) => + completeResponse(StatusCodes.Created, toResponse(workflowId, workflowState), warnings) + case WorkflowStoreSubmitActor.WorkflowsBatchSubmittedToStore(workflowIds, _) => + completeResponse(StatusCodes.Created, workflowIds.toList.map(toResponse(_, workflowState)), warnings) + case WorkflowStoreSubmitActor.WorkflowSubmitFailed(throwable) => + throwable.failRequest(StatusCodes.BadRequest, warnings) + } + case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => serviceShuttingDownResponse + case Failure(e: TimeoutException) => e.failRequest(StatusCodes.ServiceUnavailable) + case Failure(e) => e.failRequest(StatusCodes.InternalServerError, warnings) + } + } + + onComplete(materializeFormData(formData)) { + case Success(data) => + PartialWorkflowSources.fromSubmitRoute(data, allowNoInputs = isSingleSubmission) match { + case Success(workflowSourceFiles) if isSingleSubmission && workflowSourceFiles.size == 1 => + val warnings = workflowSourceFiles.flatMap(_.warnings) + askSubmit(WorkflowStoreActor.SubmitWorkflow(workflowSourceFiles.head), warnings, getWorkflowState(workflowSourceFiles.head.workflowOnHold)) + // Catches the case where someone has gone through the single submission endpoint w/ more than one workflow + case Success(workflowSourceFiles) if isSingleSubmission => + val warnings = workflowSourceFiles.flatMap(_.warnings) + val e = new IllegalArgumentException("To submit more than one workflow at a time, use the batch endpoint.") + e.failRequest(StatusCodes.BadRequest, warnings) + case Success(workflowSourceFiles) => + val warnings = workflowSourceFiles.flatMap(_.warnings) + askSubmit( + WorkflowStoreActor.BatchSubmitWorkflows(NonEmptyList.fromListUnsafe(workflowSourceFiles.toList)), + warnings, getWorkflowState(workflowSourceFiles.head.workflowOnHold)) + case Failure(t) => t.failRequest(StatusCodes.BadRequest) + } + case Failure(e: TimeoutException) => e.failRequest(StatusCodes.ServiceUnavailable) + case Failure(e) => e.failRequest(StatusCodes.InternalServerError) + } + } } object CromwellApiService { @@ -261,4 +322,4 @@ object CromwellApiService { val backendResponse = BackendResponse(BackendConfiguration.AllBackendEntries.map(_.name).sorted, BackendConfiguration.DefaultBackendEntry.name) val versionResponse = JsObject(Map("cromwell" -> cromwellVersion.toJson)) val serviceShuttingDownResponse = new Exception("Cromwell service is shutting down.").failRequest(StatusCodes.ServiceUnavailable) -} \ No newline at end of file +} diff --git a/engine/src/main/scala/cromwell/webservice/routes/WesCromwellRouteSupport.scala b/engine/src/main/scala/cromwell/webservice/routes/WesCromwellRouteSupport.scala deleted file mode 100644 index 349d36a9251..00000000000 --- a/engine/src/main/scala/cromwell/webservice/routes/WesCromwellRouteSupport.scala +++ /dev/null @@ -1,90 +0,0 @@ -package cromwell.webservice.routes - - -import akka.actor.{ActorRef, ActorRefFactory} -import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ -import akka.http.scaladsl.model.{Multipart, StatusCodes} -import akka.http.scaladsl.server.Directives.onComplete -import akka.http.scaladsl.server.Route -import akka.pattern.{AskTimeoutException, ask} -import akka.stream.ActorMaterializer -import akka.util.Timeout -import cats.data.NonEmptyList -import com.typesafe.config.ConfigFactory -import cromwell.core.{WorkflowId, WorkflowOnHold, WorkflowState, WorkflowSubmitted, path => _} -import cromwell.engine.workflow.workflowstore.{WorkflowStoreActor, WorkflowStoreSubmitActor} -import cromwell.server.CromwellShutdown -import cromwell.webservice.WebServiceUtils.EnhancedThrowable -import cromwell.webservice.WorkflowJsonSupport._ -import cromwell.webservice.{PartialWorkflowSources, WebServiceUtils, WorkflowSubmitResponse} -import net.ceedubs.ficus.Ficus._ - -import scala.concurrent.duration.FiniteDuration -import scala.concurrent.{ExecutionContext, TimeoutException} -import scala.util.{Failure, Success} - -trait WesCromwellRouteSupport extends WebServiceUtils { - - val workflowStoreActor: ActorRef - - implicit val duration = ConfigFactory.load().as[FiniteDuration]("akka.http.server.request-timeout") - implicit val timeout: Timeout = duration - - implicit def actorRefFactory: ActorRefFactory - - implicit val materializer: ActorMaterializer - implicit val ec: ExecutionContext - - def toResponse(workflowId: WorkflowId, workflowState: WorkflowState): WorkflowSubmitResponse = { - WorkflowSubmitResponse(workflowId.toString, workflowState.toString) - } - - def submitRequest(formData: Multipart.FormData, isSingleSubmission: Boolean): Route = { - - def getWorkflowState(workflowOnHold: Boolean): WorkflowState = { - if (workflowOnHold) - WorkflowOnHold - else WorkflowSubmitted - } - - def sendToWorkflowStore(command: WorkflowStoreActor.WorkflowStoreActorSubmitCommand, warnings: Seq[String], workflowState: WorkflowState): Route = { - // NOTE: Do not blindly copy the akka-http -to- ask-actor pattern below without knowing the pros and cons. - onComplete(workflowStoreActor.ask(command).mapTo[WorkflowStoreSubmitActor.WorkflowStoreSubmitActorResponse]) { - case Success(w) => - w match { - case WorkflowStoreSubmitActor.WorkflowSubmittedToStore(workflowId, _) => - completeResponse(StatusCodes.Created, toResponse(workflowId, workflowState), warnings) - case WorkflowStoreSubmitActor.WorkflowsBatchSubmittedToStore(workflowIds, _) => - completeResponse(StatusCodes.Created, workflowIds.toList.map(toResponse(_, workflowState)), warnings) - case WorkflowStoreSubmitActor.WorkflowSubmitFailed(throwable) => - throwable.failRequest(StatusCodes.BadRequest, warnings) - } - case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => CromwellApiService.serviceShuttingDownResponse - case Failure(e: TimeoutException) => e.failRequest(StatusCodes.ServiceUnavailable) - case Failure(e) => e.failRequest(StatusCodes.InternalServerError, warnings) - } - } - - onComplete(materializeFormData(formData)) { - case Success(data) => - PartialWorkflowSources.fromSubmitRoute(data, allowNoInputs = isSingleSubmission) match { - case Success(workflowSourceFiles) if isSingleSubmission && workflowSourceFiles.size == 1 => - val warnings = workflowSourceFiles.flatMap(_.warnings) - sendToWorkflowStore(WorkflowStoreActor.SubmitWorkflow(workflowSourceFiles.head), warnings, getWorkflowState(workflowSourceFiles.head.workflowOnHold)) - // Catches the case where someone has gone through the single submission endpoint w/ more than one workflow - case Success(workflowSourceFiles) if isSingleSubmission => - val warnings = workflowSourceFiles.flatMap(_.warnings) - val e = new IllegalArgumentException("To submit more than one workflow at a time, use the batch endpoint.") - e.failRequest(StatusCodes.BadRequest, warnings) - case Success(workflowSourceFiles) => - val warnings = workflowSourceFiles.flatMap(_.warnings) - sendToWorkflowStore( - WorkflowStoreActor.BatchSubmitWorkflows(NonEmptyList.fromListUnsafe(workflowSourceFiles.toList)), - warnings, getWorkflowState(workflowSourceFiles.head.workflowOnHold)) - case Failure(t) => t.failRequest(StatusCodes.BadRequest) - } - case Failure(e: TimeoutException) => e.failRequest(StatusCodes.ServiceUnavailable) - case Failure(e) => e.failRequest(StatusCodes.InternalServerError) - } - } -} diff --git a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala index 7ec56e3317a..ba6546fb3e3 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala @@ -1,41 +1,47 @@ package cromwell.webservice.routes.wes import akka.actor.ActorRef -import akka.http.scaladsl.model.{StatusCode, StatusCodes} +import akka.http.scaladsl.model.{Multipart, StatusCode, StatusCodes} import akka.http.scaladsl.server.Directives._ import akka.http.scaladsl.server.directives.RouteDirectives.complete import akka.http.scaladsl.server.{Directive1, Route} import akka.pattern.{AskTimeoutException, ask} +import akka.stream.ActorMaterializer import akka.util.Timeout +import cats.data.NonEmptyList import com.typesafe.config.ConfigFactory -import cromwell.core.WorkflowId import cromwell.core.abort.SuccessfulAbortResponse +import cromwell.core.{WorkflowId, WorkflowOnHold, WorkflowState, WorkflowSubmitted} import cromwell.engine.instrumentation.HttpInstrumentation import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException +import cromwell.engine.workflow.workflowstore.{WorkflowStoreActor, WorkflowStoreSubmitActor} import cromwell.server.CromwellShutdown import cromwell.services.metadata.MetadataService.{BuildMetadataJsonAction, GetSingleWorkflowMetadataAction, GetStatus, MetadataServiceResponse, StatusLookupFailed} import cromwell.services.{FailedMetadataJsonResponse, SuccessfulMetadataJsonResponse} -import cromwell.webservice.WebServiceUtils.EnhancedThrowable +import cromwell.webservice.PartialWorkflowSources +import cromwell.webservice.WebServiceUtils.{EnhancedThrowable, completeResponse, materializeFormData} +import cromwell.webservice.routes.CromwellApiService import cromwell.webservice.routes.CromwellApiService.{UnrecognizedWorkflowException, validateWorkflowIdInMetadata} import cromwell.webservice.routes.MetadataRouteSupport.{metadataBuilderActorRequest, metadataQueryRequest} import cromwell.webservice.routes.wes.WesResponseJsonSupport._ -import cromwell.webservice.routes.wes.WesRouteSupport._ -import cromwell.webservice.routes.{CromwellApiService, WesCromwellRouteSupport} +import cromwell.webservice.routes.wes.WesRouteSupport.{respondWithWesError, _} import net.ceedubs.ficus.Ficus._ import scala.concurrent.duration.FiniteDuration -import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.{ExecutionContext, Future, TimeoutException} import scala.util.{Failure, Success} -trait WesRouteSupport extends HttpInstrumentation with WesCromwellRouteSupport { +trait WesRouteSupport extends HttpInstrumentation { val serviceRegistryActor: ActorRef val workflowManagerActor: ActorRef + val workflowStoreActor: ActorRef implicit val ec: ExecutionContext implicit val timeout: Timeout + implicit val materializer: ActorMaterializer /* Defines routes intended to sit alongside the primary Cromwell REST endpoints. For instance, we'll now have: @@ -68,7 +74,7 @@ trait WesRouteSupport extends HttpInstrumentation with WesCromwellRouteSupport { } ~ post { extractSubmission() { submission => - submitRequest(submission.entity, + wesSubmitRequest(submission.entity, isSingleSubmission = true) } } @@ -108,6 +114,62 @@ trait WesRouteSupport extends HttpInstrumentation with WesCromwellRouteSupport { } ) } + + def toWesResponse(workflowId: WorkflowId, workflowState: WorkflowState): WesRunStatus = { + WesRunStatus(workflowId.toString, WesState.fromCromwellStatus(workflowState)) + } + + def toWesResponseId(workflowId: WorkflowId): WesRunId ={ + WesRunId(workflowId.toString) + } + + def wesSubmitRequest(formData: Multipart.FormData, isSingleSubmission: Boolean): Route = { + def getWorkflowState(workflowOnHold: Boolean): WorkflowState = { + if (workflowOnHold) + WorkflowOnHold + else WorkflowSubmitted + } + + def sendToWorkflowStore(command: WorkflowStoreActor.WorkflowStoreActorSubmitCommand, warnings: Seq[String], workflowState: WorkflowState): Route = { + // NOTE: Do not blindly copy the akka-http -to- ask-actor pattern below without knowing the pros and cons. + onComplete(workflowStoreActor.ask(command).mapTo[WorkflowStoreSubmitActor.WorkflowStoreSubmitActorResponse]) { + case Success(w) => + w match { + case WorkflowStoreSubmitActor.WorkflowSubmittedToStore(workflowId, _) => + completeResponse(StatusCodes.Created, toWesResponseId(workflowId), warnings) + case WorkflowStoreSubmitActor.WorkflowsBatchSubmittedToStore(workflowIds, _) => + completeResponse(StatusCodes.Created, workflowIds.toList.map(toWesResponse(_, workflowState)), warnings) + case WorkflowStoreSubmitActor.WorkflowSubmitFailed(throwable) => + respondWithWesError(throwable.getLocalizedMessage, StatusCodes.BadRequest) + } + case Failure(_: AskTimeoutException) if CromwellShutdown.shutdownInProgress() => respondWithWesError("Cromwell service is shutting down", StatusCodes.InternalServerError) + case Failure(e: TimeoutException) => e.failRequest(StatusCodes.ServiceUnavailable) + case Failure(e) => e.failRequest(StatusCodes.InternalServerError, warnings) + } + } + + onComplete(materializeFormData(formData)) { + case Success(data) => + PartialWorkflowSources.fromSubmitRoute(data, allowNoInputs = isSingleSubmission) match { + case Success(workflowSourceFiles) if isSingleSubmission && workflowSourceFiles.size == 1 => + val warnings = workflowSourceFiles.flatMap(_.warnings) + sendToWorkflowStore(WorkflowStoreActor.SubmitWorkflow(workflowSourceFiles.head), warnings, getWorkflowState(workflowSourceFiles.head.workflowOnHold)) + // Catches the case where someone has gone through the single submission endpoint w/ more than one workflow + case Success(workflowSourceFiles) if isSingleSubmission => + val warnings = workflowSourceFiles.flatMap(_.warnings) + val e = new IllegalArgumentException("To submit more than one workflow at a time, use the batch endpoint.") + e.failRequest(StatusCodes.BadRequest, warnings) + case Success(workflowSourceFiles) => + val warnings = workflowSourceFiles.flatMap(_.warnings) + sendToWorkflowStore( + WorkflowStoreActor.BatchSubmitWorkflows(NonEmptyList.fromListUnsafe(workflowSourceFiles.toList)), + warnings, getWorkflowState(workflowSourceFiles.head.workflowOnHold)) + case Failure(t) => t.failRequest(StatusCodes.BadRequest) + } + case Failure(e: TimeoutException) => e.failRequest(StatusCodes.ServiceUnavailable) + case Failure(e) => respondWithWesError(e.getLocalizedMessage, StatusCodes.InternalServerError) + } + } } diff --git a/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala b/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala index c8306088ded..6e9a390ad4a 100644 --- a/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala @@ -165,8 +165,7 @@ class WesRouteSupportSpec extends AsyncFlatSpec with ScalatestRouteTest with Mat check { assertResult( s"""{ - | "id": "${CromwellApiServiceSpec.ExistingWorkflowId.toString}", - | "status": "Submitted" + | "run_id": "${CromwellApiServiceSpec.ExistingWorkflowId.toString}" |}""".stripMargin) { responseAs[String].parseJson.prettyPrint } From 4d8ef27265748c1c79b3c9ffbfd68f5342dabc68 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Fri, 26 Aug 2022 18:27:48 +0100 Subject: [PATCH 074/326] Actors to push metadata to AWS SNS @markjschreiber --- project/Dependencies.scala | 1 + .../HybridSnsMetadataServiceActor.scala.scala | 86 +++++++++++++ .../AwsSnsMetadataServiceActor.scala.scala | 114 ++++++++++++++++++ .../sns/AwsSnsMetadataServiceActorSpec.scala | 60 +++++++++ 4 files changed, 261 insertions(+) create mode 100644 services/src/main/scala/cromwell/services/metadata/impl/hybridsns/HybridSnsMetadataServiceActor.scala.scala create mode 100644 services/src/main/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActor.scala.scala create mode 100644 services/src/test/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActorSpec.scala diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 1c2c4460c8e..55f2b316db7 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -312,6 +312,7 @@ object Dependencies { "ecr", "ecrpublic", "secretsmanager", + "sns", ).map(artifactName => "software.amazon.awssdk" % artifactName % awsSdkV) private val googleCloudDependencies = List( diff --git a/services/src/main/scala/cromwell/services/metadata/impl/hybridsns/HybridSnsMetadataServiceActor.scala.scala b/services/src/main/scala/cromwell/services/metadata/impl/hybridsns/HybridSnsMetadataServiceActor.scala.scala new file mode 100644 index 00000000000..a173b1d9855 --- /dev/null +++ b/services/src/main/scala/cromwell/services/metadata/impl/hybridsns/HybridSnsMetadataServiceActor.scala.scala @@ -0,0 +1,86 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package cromwell.services.metadata.impl.sns + +import akka.actor.{Actor, ActorLogging, ActorRef} +import com.typesafe.config.Config +import cromwell.services.metadata.MetadataService.{PutMetadataAction, PutMetadataActionAndRespond} +import cromwell.services.metadata.impl.MetadataServiceActor + + +/** + * A metadata service implementation which will function as a standard metadata service but also push all metadata + * events to AWS SNS (Simple Notification Service). This class closely follows the pattern established in the + * HybridPubSubMetadataServiceActor + * + * Under the hood it maintains its own MetadataServiceActor and AwsSnsMetadataServiceActor. All messages are routed + * to the MetadataServiceActor. PutMetadataActions are also sent to the AwsSnsMetadataServiceActor. PutMetadataActionAndRespond + * messages will be sent to the SnsMetadataServiceActor as a standard PutMetadataAction, i.e. only the standard + * metadata service will be ACKing the request. + * + * To use this actor something similar to the following should be present in the cromwell.conf file: + *
+  * services {
+  *   MetadataService {
+  *     class="cromwell.services.metadata.impl.sns.HybridSnsMetadataServiceActor"
+  *     config {
+  *       aws {
+  *         application-name = "cromwell"
+  *         auths = [{
+  *           name = "default"
+  *           scheme = "default"
+  *         }]
+  *         region = "us-east-1"
+  *         topicArn = "arn:aws:sns:us-east-1:1111111111111:cromwell-metadata"
+  *       }
+  *     }
+  *   }
+  * }
+  * 
+ * + * @see cromwell.services.metadata.impl.sns.AwsSnsMetadataServiceActor + */ +class HybridSnsMetadataServiceActor(serviceConfig: Config, globalConfig: Config, serviceRegistryActor: ActorRef) extends Actor with ActorLogging { + val standardMetadataActor: ActorRef = context.actorOf(MetadataServiceActor.props(serviceConfig, globalConfig, serviceRegistryActor)) + val awsSnsMetadataActor: ActorRef = context.actorOf(AwsSnsMetadataServiceActor.props(serviceConfig, globalConfig, serviceRegistryActor)) + + override def receive = { + case action: PutMetadataAction => + standardMetadataActor forward action + awsSnsMetadataActor forward action + case action: PutMetadataActionAndRespond => + standardMetadataActor forward action + awsSnsMetadataActor forward PutMetadataAction(action.events) + case anythingElse => standardMetadataActor forward anythingElse + } +} + diff --git a/services/src/main/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActor.scala.scala b/services/src/main/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActor.scala.scala new file mode 100644 index 00000000000..10f9caaba35 --- /dev/null +++ b/services/src/main/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActor.scala.scala @@ -0,0 +1,114 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package cromwell.services.metadata.impl.sns + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import com.typesafe.config.Config +import cromwell.cloudsupport.aws.AwsConfiguration +import cromwell.core.Dispatcher.ServiceDispatcher +import cromwell.services.metadata.MetadataEvent +import cromwell.services.metadata.MetadataService.{MetadataWriteFailure, MetadataWriteSuccess, PutMetadataAction, PutMetadataActionAndRespond} +import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.sns.SnsClient +import software.amazon.awssdk.services.sns.model.PublishRequest +import spray.json.enrichAny + +import scala.concurrent.{ExecutionContextExecutor, Future} +import scala.util.{Failure, Success} + + +/** + * An actor that publishes metadata events to AWS SNS + * @param serviceConfig the source of service config information + * @param globalConfig the source of global config information + * @param serviceRegistryActor the actor for registering services + * @see cromwell.services.metadata.impl.sns.HybridSnsMetadataServiceActor + */ +class AwsSnsMetadataServiceActor(serviceConfig: Config, globalConfig: Config, serviceRegistryActor: ActorRef) extends Actor with ActorLogging { + implicit val ec: ExecutionContextExecutor = context.dispatcher + + //setup sns client + val topicArn: String = serviceConfig.getString("aws.topicArn") + + val awsConfig: AwsConfiguration = AwsConfiguration(globalConfig) + val credentialsProviderChain: AwsCredentialsProviderChain = + AwsCredentialsProviderChain.of(awsConfig.authsByName.values.map(_.provider()).toSeq :_*) + + lazy val snsClient: SnsClient = SnsClient.builder() + .region(awsConfig.region.getOrElse(Region.US_EAST_1)) + .credentialsProvider(credentialsProviderChain) + .build() + + def publishMessages(events: Iterable[MetadataEvent]): Future[Unit] = { + import AwsSnsMetadataServiceActor.EnhancedMetadataEvents + + val eventsJson = events.toJson + //if there are no events then don't publish anything + if( eventsJson.length < 1) { return Future(())} + log.debug("Publishing to " + topicArn + ": " + eventsJson) + + Future { + snsClient.publish(PublishRequest.builder() + .message("[" + eventsJson.mkString(",") + "]") + .topicArn(topicArn) + .subject("cromwell-metadata-event") + .build()) + () //return unit + } + } + + override def receive: Receive = { + case action: PutMetadataAction => + publishMessages(action.events).failed foreach { e => + log.error(e, "Failed to post metadata: " + action.events) + } + case action: PutMetadataActionAndRespond => + publishMessages(action.events) onComplete { + case Success(_) => action.replyTo ! MetadataWriteSuccess(action.events) + case Failure(e) => action.replyTo ! MetadataWriteFailure(e, action.events) + } + } +} + +object AwsSnsMetadataServiceActor { + def props(serviceConfig: Config, globalConfig: Config, serviceRegistryActor: ActorRef): Props = { + Props(new AwsSnsMetadataServiceActor(serviceConfig, globalConfig, serviceRegistryActor)).withDispatcher(ServiceDispatcher) + } + + implicit class EnhancedMetadataEvents(val e: Iterable[MetadataEvent]) extends AnyVal { + import cromwell.services.metadata.MetadataJsonSupport._ + + def toJson: Seq[String] = e.map(_.toJson.toString()).toSeq + } +} + diff --git a/services/src/test/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActorSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActorSpec.scala new file mode 100644 index 00000000000..028a1b5fac5 --- /dev/null +++ b/services/src/test/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActorSpec.scala @@ -0,0 +1,60 @@ +package cromwell.services.metadata.impl.sns + +import java.time.OffsetDateTime + +import akka.actor.{ActorInitializationException, ActorRef, Props} +import akka.testkit.{EventFilter, TestProbe} +import com.typesafe.config.{Config, ConfigFactory} +import cromwell.core.WorkflowId +import cromwell.services.ServicesSpec +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} + + +class AwsSnsMetadataServiceActorSpec extends ServicesSpec { + import AwsSnsMetadataServiceActorSpec._ + + val registryProbe: ActorRef = TestProbe().ref + + "An AwsSnsMetadataActor with an empty serviceConfig" should { + "fail to build" in { + EventFilter[ActorInitializationException](occurrences = 1) intercept { + system.actorOf(Props(new AwsSnsMetadataServiceActor(emptyConfig, emptyConfig, registryProbe))) + } + } + } + + "An AwsSnsMetadataActor with a topic and configuration" should { + "successfully build" in { + system.actorOf(Props(new AwsSnsMetadataServiceActor(configWithTopic, emptyConfig, registryProbe))) + } + + "process an event" in { + val actor = system.actorOf(Props(new AwsSnsMetadataServiceActor(configWithTopic, emptyConfig, registryProbe))) + actor ! event + } + } +} + +object AwsSnsMetadataServiceActorSpec { + + // This doesn't include a topic so should be a failure + val emptyConfig: Config = ConfigFactory.empty() + + val configWithTopic: Config = ConfigFactory.parseString( + """ + |aws { + | application-name = "cromwell" + | auths = [{ + | name = "default" + | scheme = "default" + | }] + | region = "us-east-1" + | topicArn = "arn:aws:sns:us-east-1:1111111111111:cromwell-metadata" + |} + """.stripMargin + ) + + val event: MetadataEvent = MetadataEvent(MetadataKey(WorkflowId.randomId(), None, "key"), + Option(MetadataValue("value")), OffsetDateTime.now) +} + From 3b0856d080349c1b73bd6f8da4993fc3cc1ead4e Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Mon, 29 Aug 2022 17:24:38 +0100 Subject: [PATCH 075/326] - support fifo and non-fifo sns topics - support for eventbridge --- project/Dependencies.scala | 1 + .../aws/EventBridgeMetadataServiceActor.scala | 122 ++++++++++++++++++ .../SnsMetadataServiceActor.scala} | 22 +++- ...ybridEventBridgeMetadataServiceActor.scala | 86 ++++++++++++ .../HybridSnsMetadataServiceActor.scala} | 2 +- .../EventBridgeMetadataServiceActorSpec.scala | 60 +++++++++ .../SnsMetadataServiceActorSpec.scala} | 2 +- 7 files changed, 287 insertions(+), 8 deletions(-) create mode 100644 services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala rename services/src/main/scala/cromwell/services/metadata/impl/{sns/AwsSnsMetadataServiceActor.scala.scala => aws/SnsMetadataServiceActor.scala} (90%) create mode 100644 services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala rename services/src/main/scala/cromwell/services/metadata/impl/{hybridsns/HybridSnsMetadataServiceActor.scala.scala => hybridaws/HybridSnsMetadataServiceActor.scala} (98%) create mode 100644 services/src/test/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActorSpec.scala rename services/src/test/scala/cromwell/services/metadata/impl/{sns/AwsSnsMetadataServiceActorSpec.scala => aws/SnsMetadataServiceActorSpec.scala} (97%) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 55f2b316db7..519c13d4c37 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -313,6 +313,7 @@ object Dependencies { "ecrpublic", "secretsmanager", "sns", + "eventbridge", ).map(artifactName => "software.amazon.awssdk" % artifactName % awsSdkV) private val googleCloudDependencies = List( diff --git a/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala new file mode 100644 index 00000000000..20c41ca8c28 --- /dev/null +++ b/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala @@ -0,0 +1,122 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package cromwell.services.metadata.impl.aws + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import com.typesafe.config.Config +import cromwell.cloudsupport.aws.AwsConfiguration +import cromwell.core.Dispatcher.ServiceDispatcher +import cromwell.services.metadata.MetadataEvent +import cromwell.services.metadata.MetadataService.{MetadataWriteFailure, MetadataWriteSuccess, PutMetadataAction, PutMetadataActionAndRespond} +import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.eventbridge.EventBridgeClient +import software.amazon.awssdk.services.eventbridge.model.PutEventsRequest +import software.amazon.awssdk.services.eventbridge.model.PutEventsRequestEntry +import spray.json.enrichAny + +import scala.concurrent.{ExecutionContextExecutor, Future} +import scala.util.{Failure, Success} + + +/** + * An actor that publishes metadata events to AWS EventBridge + * @param serviceConfig the source of service config information + * @param globalConfig the source of global config information + * @param serviceRegistryActor the actor for registering services + * @see cromwell.services.metadata.impl.aws.HybridEventBridgeMetadataServiceActor + */ +class AwsEventBridgeMetadataServiceActor(serviceConfig: Config, globalConfig: Config, serviceRegistryActor: ActorRef) extends Actor with ActorLogging { + implicit val ec: ExecutionContextExecutor = context.dispatcher + + //setup EB client + val busName: String = serviceConfig.getString("aws.busName") + + val awsConfig: AwsConfiguration = AwsConfiguration(globalConfig) + val credentialsProviderChain: AwsCredentialsProviderChain = + AwsCredentialsProviderChain.of(awsConfig.authsByName.values.map(_.provider()).toSeq :_*) + + lazy val eventBrClient : EventBridgeClient = EventBridgeClient.builder() + .region(awsConfig.region.getOrElse(Region.US_EAST_1)) + .credentialsProvider(credentialsProviderChain) + .build(); + + def publishMessages(events: Iterable[MetadataEvent]): Future[Unit] = { + import AwsEventBridgeMetadataServiceActor.EnhancedMetadataEvents + + val eventsJson = events.toJson + //if there are no events then don't publish anything + if( eventsJson.length < 1) { return Future(())} + log.debug(f"Publishing to $busName : $eventsJson") + + val reqEntry = PutEventsRequestEntry.builder() + .eventBusName(busName) + .source("cromwell") + .detailType("cromwell-metadata-event") + .detail(eventsJson.mkString(",")) + .build() + + val eventsRequest = PutEventsRequest.builder() + .entries(reqEntry) + .build() + + Future { + eventBrClient.putEvents(eventsRequest) + () //return unit + } + } + + override def receive: Receive = { + case action: PutMetadataAction => + publishMessages(action.events).failed foreach { e => + log.error(e, "Failed to post metadata: " + action.events) + } + case action: PutMetadataActionAndRespond => + publishMessages(action.events) onComplete { + case Success(_) => action.replyTo ! MetadataWriteSuccess(action.events) + case Failure(e) => action.replyTo ! MetadataWriteFailure(e, action.events) + } + } +} + +object AwsEventBridgeMetadataServiceActor { + def props(serviceConfig: Config, globalConfig: Config, serviceRegistryActor: ActorRef): Props = { + Props(new AwsEventBridgeMetadataServiceActor(serviceConfig, globalConfig, serviceRegistryActor)).withDispatcher(ServiceDispatcher) + } + + implicit class EnhancedMetadataEvents(val e: Iterable[MetadataEvent]) extends AnyVal { + import cromwell.services.metadata.MetadataJsonSupport._ + + def toJson: Seq[String] = e.map(_.toJson.toString()).toSeq + } +} + diff --git a/services/src/main/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActor.scala.scala b/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala similarity index 90% rename from services/src/main/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActor.scala.scala rename to services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala index 10f9caaba35..da66ae4f7f4 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActor.scala.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala @@ -29,7 +29,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ -package cromwell.services.metadata.impl.sns +package cromwell.services.metadata.impl.aws + +import java.util.UUID import akka.actor.{Actor, ActorLogging, ActorRef, Props} import com.typesafe.config.Config @@ -75,13 +77,21 @@ class AwsSnsMetadataServiceActor(serviceConfig: Config, globalConfig: Config, se val eventsJson = events.toJson //if there are no events then don't publish anything if( eventsJson.length < 1) { return Future(())} - log.debug("Publishing to " + topicArn + ": " + eventsJson) + log.debug(f"Publishing to $topicArn : $eventsJson") + + val message = PublishRequest.builder() + .message("[" + eventsJson.mkString(",") + "]") + .topicArn(topicArn) + .subject("cromwell-metadata-event") + + if (topicArn.endsWith(".fifo")) { + message + .messageGroupId("cromwell") + .messageDeduplicationId(UUID.randomUUID().toString()) + } Future { - snsClient.publish(PublishRequest.builder() - .message("[" + eventsJson.mkString(",") + "]") - .topicArn(topicArn) - .subject("cromwell-metadata-event") + snsClient.publish(message .build()) () //return unit } diff --git a/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala new file mode 100644 index 00000000000..d5b011df176 --- /dev/null +++ b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala @@ -0,0 +1,86 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package cromwell.services.metadata.impl.aws + +import akka.actor.{Actor, ActorLogging, ActorRef} +import com.typesafe.config.Config +import cromwell.services.metadata.MetadataService.{PutMetadataAction, PutMetadataActionAndRespond} +import cromwell.services.metadata.impl.MetadataServiceActor + + +/** + * A metadata service implementation which will function as a standard metadata service but also push all metadata + * events to AWS EventBridge. This class closely follows the pattern established in the + * HybridPubSubMetadataServiceActor + * + * Under the hood it maintains its own MetadataServiceActor and AwsEventBridgeMetadataServiceActor. All messages are routed + * to the MetadataServiceActor. PutMetadataActions are also sent to the AwsEventBridgeMetadataServiceActor. PutMetadataActionAndRespond + * messages will be sent to the EventBridgeMetadataServiceActor as a standard PutMetadataAction, i.e. only the standard + * metadata service will be ACKing the request. + * + * To use this actor something similar to the following should be present in the cromwell.conf file: + *
+  * services {
+  *   MetadataService {
+  *     class="cromwell.services.metadata.impl.aws.HybridEventBridgeMetadataServiceActor"
+  *     config {
+  *       aws {
+  *         application-name = "cromwell"
+  *         auths = [{
+  *           name = "default"
+  *           scheme = "default"
+  *         }]
+  *         region = "us-east-1"
+  *         busName = "cromwell-metadata"
+  *       }
+  *     }
+  *   }
+  * }
+  * 
+ * + * @see cromwell.services.metadata.impl.aws.AwsEventBridgeMetadataServiceActor + */ +class HybridEventBridgeMetadataServiceActor(serviceConfig: Config, globalConfig: Config, serviceRegistryActor: ActorRef) extends Actor with ActorLogging { + val standardMetadataActor: ActorRef = context.actorOf(MetadataServiceActor.props(serviceConfig, globalConfig, serviceRegistryActor)) + val awsEventBridgeMetadataActor: ActorRef = context.actorOf(AwsEventBridgeMetadataServiceActor.props(serviceConfig, globalConfig, serviceRegistryActor)) + + override def receive = { + case action: PutMetadataAction => + standardMetadataActor forward action + awsEventBridgeMetadataActor forward action + case action: PutMetadataActionAndRespond => + standardMetadataActor forward action + awsEventBridgeMetadataActor forward PutMetadataAction(action.events) + case anythingElse => standardMetadataActor forward anythingElse + } +} + diff --git a/services/src/main/scala/cromwell/services/metadata/impl/hybridsns/HybridSnsMetadataServiceActor.scala.scala b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala similarity index 98% rename from services/src/main/scala/cromwell/services/metadata/impl/hybridsns/HybridSnsMetadataServiceActor.scala.scala rename to services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala index a173b1d9855..26c2e3f70ca 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/hybridsns/HybridSnsMetadataServiceActor.scala.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala @@ -29,7 +29,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -package cromwell.services.metadata.impl.sns +package cromwell.services.metadata.impl.aws import akka.actor.{Actor, ActorLogging, ActorRef} import com.typesafe.config.Config diff --git a/services/src/test/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActorSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActorSpec.scala new file mode 100644 index 00000000000..ecfab9dc00a --- /dev/null +++ b/services/src/test/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActorSpec.scala @@ -0,0 +1,60 @@ +package cromwell.services.metadata.impl.aws + +import java.time.OffsetDateTime + +import akka.actor.{ActorInitializationException, ActorRef, Props} +import akka.testkit.{EventFilter, TestProbe} +import com.typesafe.config.{Config, ConfigFactory} +import cromwell.core.WorkflowId +import cromwell.services.ServicesSpec +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} + + +class AwsEventBridgeMetadataServiceActorSpec extends ServicesSpec { + import AwsEventBridgeMetadataServiceActorSpec._ + + val registryProbe: ActorRef = TestProbe().ref + + "An AwsEventBridgeMetadataActor with an empty serviceConfig" should { + "fail to build" in { + EventFilter[ActorInitializationException](occurrences = 1) intercept { + system.actorOf(Props(new AwsEventBridgeMetadataServiceActor(emptyConfig, emptyConfig, registryProbe))) + } + } + } + + "An AwsEventBridgeMetadataActor with a bus name and configuration" should { + "successfully build" in { + system.actorOf(Props(new AwsEventBridgeMetadataServiceActor(configWithBus, emptyConfig, registryProbe))) + } + + "process an event" in { + val actor = system.actorOf(Props(new AwsEventBridgeMetadataServiceActor(configWithBus, emptyConfig, registryProbe))) + actor ! event + } + } +} + +object AwsSnsMetadataServiceActorSpec { + + // This doesn't include a topic so should be a failure + val emptyConfig: Config = ConfigFactory.empty() + + val configWithBus: Config = ConfigFactory.parseString( + """ + |aws { + | application-name = "cromwell" + | auths = [{ + | name = "default" + | scheme = "default" + | }] + | region = "us-east-1" + | busName = "cromwell-metadata" + |} + """.stripMargin + ) + + val event: MetadataEvent = MetadataEvent(MetadataKey(WorkflowId.randomId(), None, "key"), + Option(MetadataValue("value")), OffsetDateTime.now) +} + diff --git a/services/src/test/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActorSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActorSpec.scala similarity index 97% rename from services/src/test/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActorSpec.scala rename to services/src/test/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActorSpec.scala index 028a1b5fac5..26347136655 100644 --- a/services/src/test/scala/cromwell/services/metadata/impl/sns/AwsSnsMetadataServiceActorSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActorSpec.scala @@ -1,4 +1,4 @@ -package cromwell.services.metadata.impl.sns +package cromwell.services.metadata.impl.aws import java.time.OffsetDateTime From b5ef39aa652c607e5c370f1e8c0e7705a3fa8ff6 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Mon, 29 Aug 2022 19:48:55 -0400 Subject: [PATCH 076/326] BW-1378 Addl CromIAM user enablement checks (#6826) --- CHANGELOG.md | 14 +++++ .../CromIamInstrumentation.scala | 1 + .../main/scala/cromiam/sam/SamClient.scala | 33 ++++++++++++ .../webservice/CromIamApiService.scala | 16 +++--- .../cromiam/webservice/QuerySupport.scala | 2 +- .../cromiam/webservice/RequestSupport.scala | 26 ++++++++- .../webservice/SubmissionSupport.scala | 2 +- .../webservice/WomtoolRouteSupport.scala | 10 ++-- .../webservice/CromIamApiServiceSpec.scala | 38 ++++++++++++- .../cromiam/webservice/MockClients.scala | 9 ++++ .../webservice/WomtoolRouteSupportSpec.scala | 54 ++++++++++++++++++- 11 files changed, 184 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c40f47b651..e1158be3ca4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Cromwell Change Log +## 84 Release Notes + +### CromIAM enabled user checks + +For Cromwell instances utilizing the optional CromIAM identity and access management component, the following endpoints now verify that the calling user is enabled before forwarding the request. +* `/api/workflows/v1/backends` +* `/api/womtool/v1/describe` + +This change makes the above endpoints consistent with the existing behavior of all the other endpoints in the `/api/` path of CromIAM. + +## 83 Release Notes + +* Changes the type of several primary key columns in call caching tables from int to bigint. The database migration may be lengthy if your database contains a large amount of call caching data. + ## 82 Release Notes * Restored missing example configuration file diff --git a/CromIAM/src/main/scala/cromiam/instrumentation/CromIamInstrumentation.scala b/CromIAM/src/main/scala/cromiam/instrumentation/CromIamInstrumentation.scala index 63f48073146..65b164f00f6 100644 --- a/CromIAM/src/main/scala/cromiam/instrumentation/CromIamInstrumentation.scala +++ b/CromIAM/src/main/scala/cromiam/instrumentation/CromIamInstrumentation.scala @@ -20,6 +20,7 @@ trait CromIamInstrumentation extends CromwellInstrumentation { val samPrefix: NonEmptyList[String] = NonEmptyList.one("sam") val getWhitelistPrefix = NonEmptyList.one("get-whitelist") + val getUserEnabledPrefix = NonEmptyList.one("get-user-enabled") val userCollectionPrefix = NonEmptyList.one("user-collection") val authCollectionPrefix = NonEmptyList.one("auth-collection") val registerCollectionPrefix = NonEmptyList.one("register-collection") diff --git a/CromIAM/src/main/scala/cromiam/sam/SamClient.scala b/CromIAM/src/main/scala/cromiam/sam/SamClient.scala index f289251a2fb..d6a315f8241 100644 --- a/CromIAM/src/main/scala/cromiam/sam/SamClient.scala +++ b/CromIAM/src/main/scala/cromiam/sam/SamClient.scala @@ -18,6 +18,7 @@ import cromiam.sam.SamResourceJsonSupport._ import cromiam.server.status.StatusCheckedSubsystem import cromwell.api.model._ import mouse.boolean._ +import spray.json.RootJsonFormat import scala.concurrent.ExecutionContextExecutor @@ -73,6 +74,33 @@ class SamClient(scheme: String, } yield whitelisted } + def isUserEnabledSam(user: User, cromIamRequest: HttpRequest): FailureResponseOrT[Boolean] = { + val request = HttpRequest( + method = HttpMethods.GET, + uri = samUserStatusUri, + headers = List[HttpHeader](user.authorization) + ) + + for { + response <- instrumentRequest( + () => Http().singleRequest(request).asFailureResponseOrT, + cromIamRequest, + instrumentationPrefixForSam(getUserEnabledPrefix) + ) + userEnabled <- response.status match { + case StatusCodes.OK => + val unmarshal: IO[UserStatusInfo] = IO.fromFuture(IO(Unmarshal(response.entity).to[UserStatusInfo])) + FailureResponseOrT.right[HttpResponse](unmarshal).map { userInfo => + if (!userInfo.enabled) log.info("Access denied for user {}", user.userId) + userInfo.enabled + } + case _ => + log.error("Could not verify access with Sam for user {}, error was {} {}", user.userId, response.status, response.toString().take(100)) + FailureResponseOrT.pure[IO, HttpResponse](false) + } + } yield userEnabled + } + def collectionsForUser(user: User, cromIamRequest: HttpRequest): FailureResponseOrT[List[Collection]] = { val request = HttpRequest(method = HttpMethods.GET, uri = samBaseCollectionUri, headers = List[HttpHeader](user.authorization)) @@ -170,6 +198,7 @@ class SamClient(scheme: String, private lazy val samBaseResourceUri = s"$samBaseUri/api/resource" private lazy val samBaseCollectionUri = s"$samBaseResourceUri/workflow-collection" private lazy val samSubmitWhitelistUri = s"$samBaseResourceUri/caas/submit/action/get_whitelist" + private lazy val samUserStatusUri = s"$samBaseUri/register/user/v2/self/info" } @@ -188,4 +217,8 @@ object SamClient { def SamRegisterCollectionExceptionResp(statusCode: StatusCode) = HttpResponse(status = statusCode, entity = SamRegisterCollectionException(statusCode).getMessage) + case class UserStatusInfo(adminEnabled: Boolean, enabled: Boolean, userEmail: String, userSubjectId: String) + + implicit val UserStatusInfoFormat: RootJsonFormat[UserStatusInfo] = jsonFormat4(UserStatusInfo) + } diff --git a/CromIAM/src/main/scala/cromiam/webservice/CromIamApiService.scala b/CromIAM/src/main/scala/cromiam/webservice/CromIamApiService.scala index 63694599476..7a16e5ea797 100644 --- a/CromIAM/src/main/scala/cromiam/webservice/CromIamApiService.scala +++ b/CromIAM/src/main/scala/cromiam/webservice/CromIamApiService.scala @@ -81,7 +81,7 @@ trait CromIamApiService extends RequestSupport def abortRoute: Route = path("api" / "workflows" / Segment / Segment / Abort) { (_, workflowId) => post { - extractUserAndRequest { (user, req) => + extractUserAndStrictRequest { (user, req) => logUserWorkflowAction(user, workflowId, Abort) complete { authorizeAbortThenForwardToCromwell(user, workflowId, req).asHttpResponse @@ -93,7 +93,7 @@ trait CromIamApiService extends RequestSupport //noinspection MutatorLikeMethodIsParameterless def releaseHoldRoute: Route = path("api" / "workflows" / Segment / Segment / ReleaseHold) { (_, workflowId) => post { - extractUserAndRequest { (user, req) => + extractUserAndStrictRequest { (user, req) => logUserWorkflowAction(user, workflowId, ReleaseHold) complete { authorizeUpdateThenForwardToCromwell(user, workflowId, req).asHttpResponse @@ -112,7 +112,7 @@ trait CromIamApiService extends RequestSupport def labelPatchRoute: Route = { path("api" / "workflows" / Segment / Segment / Labels) { (_, workflowId) => patch { - extractUserAndRequest { (user, req) => + extractUserAndStrictRequest { (user, req) => entity(as[String]) { labels => logUserWorkflowAction(user, workflowId, Labels) validateLabels(Option(labels)) { _ => // Not using the labels, just using this to verify they didn't specify labels we don't want them to @@ -130,7 +130,7 @@ trait CromIamApiService extends RequestSupport def callCacheDiffRoute: Route = path("api" / "workflows" / Segment / "callcaching" / "diff") { _ => get { - extractUserAndRequest { (user, req) => + extractUserAndStrictRequest { (user, req) => logUserAction(user, "call caching diff") parameterSeq { parameters => val paramMap = parameters.toMap @@ -150,11 +150,9 @@ trait CromIamApiService extends RequestSupport */ private def workflowGetRoute(urlSuffix: String): Route = path("api" / "workflows" / Segment / urlSuffix) { _ => get { - extractUserAndRequest { (user, req) => + extractUserAndStrictRequest { (user, req) => logUserAction(user, urlSuffix) - complete { - cromwellClient.forwardToCromwell(req).asHttpResponse - } + forwardIfUserEnabled(user, req, cromwellClient, samClient) } } } @@ -166,7 +164,7 @@ trait CromIamApiService extends RequestSupport private def workflowRoute(urlSuffix: String, method: Directive0): Route = path("api" / "workflows" / Segment / Segment / urlSuffix) { (_, workflowId) => method { - extractUserAndRequest { (user, req) => + extractUserAndStrictRequest { (user, req) => logUserWorkflowAction(user, workflowId, urlSuffix) complete { authorizeReadThenForwardToCromwell(user, List(workflowId), req).asHttpResponse diff --git a/CromIAM/src/main/scala/cromiam/webservice/QuerySupport.scala b/CromIAM/src/main/scala/cromiam/webservice/QuerySupport.scala index cddabe74a57..e9397605c6a 100644 --- a/CromIAM/src/main/scala/cromiam/webservice/QuerySupport.scala +++ b/CromIAM/src/main/scala/cromiam/webservice/QuerySupport.scala @@ -55,7 +55,7 @@ trait QuerySupport extends RequestSupport { * directive */ private def preprocessQuery: Directive[(User, List[Collection], HttpRequest)] = { - extractUserAndRequest tflatMap { case (user, cromIamRequest) => + extractUserAndStrictRequest tflatMap { case (user, cromIamRequest) => log.info("Received query " + cromIamRequest.method.value + " request for user " + user.userId) onComplete(samClient.collectionsForUser(user, cromIamRequest).value.unsafeToFuture()) flatMap { diff --git a/CromIAM/src/main/scala/cromiam/webservice/RequestSupport.scala b/CromIAM/src/main/scala/cromiam/webservice/RequestSupport.scala index 12b231485f7..c9b6a196368 100644 --- a/CromIAM/src/main/scala/cromiam/webservice/RequestSupport.scala +++ b/CromIAM/src/main/scala/cromiam/webservice/RequestSupport.scala @@ -6,6 +6,13 @@ import akka.http.scaladsl.server.Directives._ import akka.http.scaladsl.server._ import cromiam.auth.User import org.broadinstitute.dsde.workbench.model.WorkbenchUserId +import akka.http.scaladsl.model.HttpResponse +import akka.http.scaladsl.server.Directives.{authorize, complete, onComplete} +import akka.http.scaladsl.server.Route +import cromiam.cromwell.CromwellClient +import cromiam.sam.SamClient + +import scala.util.{Failure, Success} trait RequestSupport { def extractStrictRequest: Directive1[HttpRequest] = { @@ -26,10 +33,27 @@ trait RequestSupport { } } - def extractUserAndRequest: Directive[(User, HttpRequest)] = { + def extractUserAndStrictRequest: Directive[(User, HttpRequest)] = { for { user <- extractUser request <- extractStrictRequest } yield (user, request) } + + def forwardIfUserEnabled(user: User, req: HttpRequest, cromwellClient: CromwellClient, samClient: SamClient): Route = { + import cromwell.api.model.EnhancedFailureResponseOrHttpResponseT + + onComplete(samClient.isUserEnabledSam(user, req).value.unsafeToFuture()) { + case Success(Left(httpResponse: HttpResponse)) => complete(httpResponse) + case Success(Right(isEnabled: Boolean)) => + authorize(isEnabled) { + complete { + cromwellClient.forwardToCromwell(req).asHttpResponse + } + } + case Failure(e) => + val message = s"Unable to look up enablement status for user ${user.userId}: ${e.getMessage}. Please try again later." + throw new RuntimeException(message, e) + } + } } diff --git a/CromIAM/src/main/scala/cromiam/webservice/SubmissionSupport.scala b/CromIAM/src/main/scala/cromiam/webservice/SubmissionSupport.scala index c1f5477475b..52a05d1cdc7 100644 --- a/CromIAM/src/main/scala/cromiam/webservice/SubmissionSupport.scala +++ b/CromIAM/src/main/scala/cromiam/webservice/SubmissionSupport.scala @@ -31,7 +31,7 @@ trait SubmissionSupport extends RequestSupport { // FIXME - getting pathPrefix to shrink this keeps hosing up, there's gotta be some way to do this def submitRoute: Route = (path("api" / "workflows" / Segment) | path("api" / "workflows" / Segment / "batch")) { _ => post { - extractUserAndRequest { (user, request) => + extractUserAndStrictRequest { (user, request) => log.info("Received submission request from user " + user.userId) onComplete(samClient.isSubmitWhitelisted(user, request).value.unsafeToFuture()) { case Success(Left(httpResponse)) => complete(httpResponse) diff --git a/CromIAM/src/main/scala/cromiam/webservice/WomtoolRouteSupport.scala b/CromIAM/src/main/scala/cromiam/webservice/WomtoolRouteSupport.scala index 671d4a76543..a6098b1fae0 100644 --- a/CromIAM/src/main/scala/cromiam/webservice/WomtoolRouteSupport.scala +++ b/CromIAM/src/main/scala/cromiam/webservice/WomtoolRouteSupport.scala @@ -2,20 +2,18 @@ package cromiam.webservice import akka.http.scaladsl.server.Directives._ import cromiam.cromwell.CromwellClient -import cromwell.api.model._ +import cromiam.sam.SamClient trait WomtoolRouteSupport extends RequestSupport { // When this trait is mixed into `CromIamApiService` the value of `cromwellClient` is the reader (non-abort) address val cromwellClient: CromwellClient + val samClient: SamClient val womtoolRoutes = path("api" / "womtool" / Segment / "describe") { _ => post { - extractStrictRequest { req => - complete { - // This endpoint requires authn which it gets for free from the proxy, does not care about authz - cromwellClient.forwardToCromwell(req).asHttpResponse - } + extractUserAndStrictRequest { (user, req) => + forwardIfUserEnabled(user, req, cromwellClient, samClient) } } } diff --git a/CromIAM/src/test/scala/cromiam/webservice/CromIamApiServiceSpec.scala b/CromIAM/src/test/scala/cromiam/webservice/CromIamApiServiceSpec.scala index 5ad6a976204..89945c5bfcb 100644 --- a/CromIAM/src/test/scala/cromiam/webservice/CromIamApiServiceSpec.scala +++ b/CromIAM/src/test/scala/cromiam/webservice/CromIamApiServiceSpec.scala @@ -4,7 +4,8 @@ import akka.event.NoLogging import akka.http.scaladsl.model.StatusCodes._ import akka.http.scaladsl.model.headers.{Authorization, OAuth2BearerToken, RawHeader} import akka.http.scaladsl.model.{ContentTypes, HttpEntity, HttpHeader} -import akka.http.scaladsl.server.MissingHeaderRejection +import akka.http.scaladsl.server.Route.seal +import akka.http.scaladsl.server.{AuthorizationFailedRejection, MissingHeaderRejection} import akka.http.scaladsl.testkit.ScalatestRouteTest import com.typesafe.config.Config import common.assertion.CromwellTimeoutSpec @@ -303,12 +304,45 @@ class CromIamApiServiceSpec extends AnyFlatSpec with CromwellTimeoutSpec with Ma } } - it should "reject request if it doesn't contain OIDC_CLAIM_user_id in header" in { + it should "reject request if it doesn't contain OIDC_CLAIM_user_id or token" in { Get(s"/api/workflows/$version/backends") ~> allRoutes ~> check { rejection shouldEqual MissingHeaderRejection("OIDC_CLAIM_user_id") } } + it should "return 403 when we request with a disabled user" in { + Get( + s"/api/workflows/$version/backends" + ).withHeaders( + List(Authorization(OAuth2BearerToken("my-token")), RawHeader("OIDC_CLAIM_user_id", "disabled@example.com")) + ) ~> allRoutes ~> check { + rejection shouldEqual AuthorizationFailedRejection + } + } + + it should "reject request if it contains a token and no OIDC_CLAIM_user_id in header" in { + Get( + s"/api/workflows/$version/backends" + ).withHeaders( + List(Authorization(OAuth2BearerToken("my-token"))) + ) ~> allRoutes ~> check { + rejection shouldEqual MissingHeaderRejection("OIDC_CLAIM_user_id") + } + } + + it should "return 404 when no auth token provided" in { + Get( + s"/api/workflows/$version/backends" + ).withHeaders( + List(RawHeader("OIDC_CLAIM_user_id", "enabled@example.com")) + // "[An] explicit call on the Route.seal method is needed in test code, but in your application code it is not necessary." + // https://doc.akka.io/docs/akka-http/current/routing-dsl/testkit.html#testing-sealed-routes + // https://doc.akka.io/docs/akka-http/current/routing-dsl/routes.html#sealing-a-route + ) ~> seal(allRoutes) ~> check { + responseAs[String] shouldEqual "The requested resource could not be found." + status shouldBe NotFound + } + } behavior of "ReleaseHold endpoint" it should "return 200 for authorized user who has collection associated with root workflow" in { diff --git a/CromIAM/src/test/scala/cromiam/webservice/MockClients.scala b/CromIAM/src/test/scala/cromiam/webservice/MockClients.scala index 3fb9689e5cc..59e75347159 100644 --- a/CromIAM/src/test/scala/cromiam/webservice/MockClients.scala +++ b/CromIAM/src/test/scala/cromiam/webservice/MockClients.scala @@ -141,6 +141,15 @@ class MockSamClient(checkSubmitWhitelist: Boolean = true) FailureResponseOrT.pure(!user.userId.value.equalsIgnoreCase(NotWhitelistedUser)) } + override def isUserEnabledSam(user: User, cromIamRequest: HttpRequest): FailureResponseOrT[Boolean] = { + if (user.userId.value == "enabled@example.com" || user.userId.value == MockSamClient.AuthorizedUserCollectionStr) + FailureResponseOrT.pure(true) + else if (user.userId.value == "disabled@example.com") + FailureResponseOrT.pure(false) + else + throw new Exception("Misconfigured test") + } + override def requestAuth(authorizationRequest: CollectionAuthorizationRequest, cromIamRequest: HttpRequest): FailureResponseOrT[Unit] = { authorizationRequest.user.userId.value match { diff --git a/CromIAM/src/test/scala/cromiam/webservice/WomtoolRouteSupportSpec.scala b/CromIAM/src/test/scala/cromiam/webservice/WomtoolRouteSupportSpec.scala index 7ba1c495f23..785c887c374 100644 --- a/CromIAM/src/test/scala/cromiam/webservice/WomtoolRouteSupportSpec.scala +++ b/CromIAM/src/test/scala/cromiam/webservice/WomtoolRouteSupportSpec.scala @@ -2,6 +2,9 @@ package cromiam.webservice import akka.http.scaladsl.model.ContentTypes import akka.http.scaladsl.model.StatusCodes._ +import akka.http.scaladsl.model.headers.{Authorization, OAuth2BearerToken, RawHeader} +import akka.http.scaladsl.server.Route.seal +import akka.http.scaladsl.server.{AuthorizationFailedRejection, MissingHeaderRejection} import akka.http.scaladsl.testkit.ScalatestRouteTest import common.assertion.CromwellTimeoutSpec import org.scalatest.flatspec.AnyFlatSpec @@ -11,15 +14,64 @@ import org.scalatest.matchers.should.Matchers class WomtoolRouteSupportSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers with WomtoolRouteSupport with ScalatestRouteTest { override lazy val cromwellClient = new MockCromwellClient() + override lazy val samClient = new MockSamClient() behavior of "Womtool endpoint routes" it should "return 200 when we request to the right path" in { - Post(s"/api/womtool/v1/describe") ~> womtoolRoutes ~> check { + Post( + s"/api/womtool/v1/describe" + ).withHeaders( + List(Authorization(OAuth2BearerToken("my-token")), RawHeader("OIDC_CLAIM_user_id", "enabled@example.com")) + ) ~> womtoolRoutes ~> check { status shouldBe OK responseAs[String] shouldBe "Hey there, workflow describer" contentType should be(ContentTypes.`text/plain(UTF-8)`) } } + it should "return 403 when we request with a disabled user" in { + Post( + s"/api/womtool/v1/describe" + ).withHeaders( + List(Authorization(OAuth2BearerToken("my-token")), RawHeader("OIDC_CLAIM_user_id", "disabled@example.com")) + ) ~> womtoolRoutes ~> check { + rejection shouldEqual AuthorizationFailedRejection + } + } + + it should "bail out with no user ID" in { + Post( + s"/api/womtool/v1/describe" + ).withHeaders( + List(Authorization(OAuth2BearerToken("my-token"))) + ) ~> womtoolRoutes ~> check { + rejection shouldEqual MissingHeaderRejection("OIDC_CLAIM_user_id") + } + } + + it should "return 404 when no auth token provided" in { + Post( + s"/api/womtool/v1/describe" + ).withHeaders( + List(RawHeader("OIDC_CLAIM_user_id", "enabled@example.com")) + // "[An] explicit call on the Route.seal method is needed in test code, but in your application code it is not necessary." + // https://doc.akka.io/docs/akka-http/current/routing-dsl/testkit.html#testing-sealed-routes + // https://doc.akka.io/docs/akka-http/current/routing-dsl/routes.html#sealing-a-route + ) ~> seal(womtoolRoutes) ~> check { + responseAs[String] shouldEqual "The requested resource could not be found." + status shouldBe NotFound + } + } + + it should "bail out with no headers" in { + Post( + s"/api/womtool/v1/describe" + ).withHeaders( + List.empty + ) ~> womtoolRoutes ~> check { + rejection shouldEqual MissingHeaderRejection("OIDC_CLAIM_user_id") + } + } + } From 8abf84ceeed1c0e5131ed4883357f90b0ad33c82 Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Tue, 30 Aug 2022 16:34:33 +0100 Subject: [PATCH 077/326] - remove "licensing" - add docs --- .../aws/EventBridgeMetadataServiceActor.scala | 31 ----------- .../impl/aws/SnsMetadataServiceActor.scala | 31 ----------- ...ybridEventBridgeMetadataServiceActor.scala | 31 ----------- .../HybridSnsMetadataServiceActor.scala | 31 ----------- .../scala/cromwell/backend/impl/aws/README.md | 54 +++++++++++++++++++ 5 files changed, 54 insertions(+), 124 deletions(-) diff --git a/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala index 20c41ca8c28..b22fccb5170 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala @@ -1,34 +1,3 @@ -/* - * Copyright 2022 Amazon.com, Inc. or its affiliates. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - package cromwell.services.metadata.impl.aws import akka.actor.{Actor, ActorLogging, ActorRef, Props} diff --git a/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala index da66ae4f7f4..724f0af6c03 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala @@ -1,34 +1,3 @@ -/* - * Copyright 2022 Amazon.com, Inc. or its affiliates. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - package cromwell.services.metadata.impl.aws import java.util.UUID diff --git a/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala index d5b011df176..fc76207ba9d 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala @@ -1,34 +1,3 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - package cromwell.services.metadata.impl.aws import akka.actor.{Actor, ActorLogging, ActorRef} diff --git a/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala index 26c2e3f70ca..d1a05e46924 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala @@ -1,34 +1,3 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - package cromwell.services.metadata.impl.aws import akka.actor.{Actor, ActorLogging, ActorRef} diff --git a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md index 2003e0f9ef3..61bb351c7e9 100644 --- a/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md +++ b/supportedBackends/aws/src/main/scala/cromwell/backend/impl/aws/README.md @@ -197,6 +197,60 @@ engine { } ``` +### Metadata notifications + +Cromwell has a feature that allows it to send metadata notifications to you. These notifications are mostly state transitions (task start, task end, workflow succeeded, workflow failed, etc) but also task descriptions. + +In the AWS backend those notifications can be send to **SNS Topic** or **EventBridge Bus** and you can use them to trigger some post run jobs. Below you can find information on how to setup it. + +#### AWS SNS + +1. Create an SNS topic, add the following to your `cromwell.conf` file and replace `topicArn` with the topic's ARN you just created: + +``` +services { + MetadataService { + class="cromwell.services.metadata.impl.aws.HybridSnsMetadataServiceActor" + config { + aws { + application-name = "cromwell" + auths = [{ + name = "default" + scheme = "default" + }] + region = "us-east-1" + topicArn = "" + } + } + } +} +``` +2. Add `sns:Publish` IAM policy to your Cromwell server IAM role. + +#### AWS EventBridge + +1. Create an EventBridge bus, add the following to your `cromwell.conf` file and replace `busName` with the name of the bus you just created: + +``` +services { + MetadataService { + class="cromwell.services.metadata.impl.aws.HybridEventBridgeMetadataServiceActor" + config { + aws { + application-name = "cromwell" + auths = [{ + name = "default" + scheme = "default" + }] + region = "us-east-1" + busName = "" + } + } + } +} +``` +2. Add `events:PutEvents` IAM policy to your Cromwell server IAM role. + AWS Batch --------- From 6afc7fc5ecc74e3b19c011f6a40e7a5967af48e7 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Tue, 30 Aug 2022 17:06:14 +0000 Subject: [PATCH 078/326] Update cromwell version from 84 to 85 --- project/Version.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Version.scala b/project/Version.scala index 1d21894dfb0..c0bdb0b3453 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -5,7 +5,7 @@ import sbt._ object Version { // Upcoming release, or current if we're on a master / hotfix branch - val cromwellVersion = "84" + val cromwellVersion = "85" /** * Returns true if this project should be considered a snapshot. From 16ea7b3f6a3bf0e363872028aeac74f2ec1a6d5c Mon Sep 17 00:00:00 2001 From: Henrique Silva Date: Wed, 31 Aug 2022 10:17:57 +0100 Subject: [PATCH 079/326] - add licensing - fix tests --- .../aws/EventBridgeMetadataServiceActor.scala | 31 +++++++++++++++++ .../impl/aws/SnsMetadataServiceActor.scala | 31 +++++++++++++++++ ...ybridEventBridgeMetadataServiceActor.scala | 31 +++++++++++++++++ .../HybridSnsMetadataServiceActor.scala | 31 +++++++++++++++++ .../EventBridgeMetadataServiceActorSpec.scala | 33 ++++++++++++++++++- .../aws/SnsMetadataServiceActorSpec.scala | 31 +++++++++++++++++ 6 files changed, 187 insertions(+), 1 deletion(-) diff --git a/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala index b22fccb5170..20c41ca8c28 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActor.scala @@ -1,3 +1,34 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + package cromwell.services.metadata.impl.aws import akka.actor.{Actor, ActorLogging, ActorRef, Props} diff --git a/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala index 724f0af6c03..da66ae4f7f4 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActor.scala @@ -1,3 +1,34 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + package cromwell.services.metadata.impl.aws import java.util.UUID diff --git a/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala index fc76207ba9d..1cde14f7212 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridEventBridgeMetadataServiceActor.scala @@ -1,3 +1,34 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + package cromwell.services.metadata.impl.aws import akka.actor.{Actor, ActorLogging, ActorRef} diff --git a/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala index d1a05e46924..f5a7b486327 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/hybridaws/HybridSnsMetadataServiceActor.scala @@ -1,3 +1,34 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + package cromwell.services.metadata.impl.aws import akka.actor.{Actor, ActorLogging, ActorRef} diff --git a/services/src/test/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActorSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActorSpec.scala index ecfab9dc00a..ab85ad525fa 100644 --- a/services/src/test/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActorSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/impl/aws/EventBridgeMetadataServiceActorSpec.scala @@ -1,3 +1,34 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + package cromwell.services.metadata.impl.aws import java.time.OffsetDateTime @@ -35,7 +66,7 @@ class AwsEventBridgeMetadataServiceActorSpec extends ServicesSpec { } } -object AwsSnsMetadataServiceActorSpec { +object AwsEventBridgeMetadataServiceActorSpec { // This doesn't include a topic so should be a failure val emptyConfig: Config = ConfigFactory.empty() diff --git a/services/src/test/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActorSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActorSpec.scala index 26347136655..6c6803b39cb 100644 --- a/services/src/test/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActorSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/impl/aws/SnsMetadataServiceActorSpec.scala @@ -1,3 +1,34 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + package cromwell.services.metadata.impl.aws import java.time.OffsetDateTime From 54fed3e172e2138cd956c0b9663c05a8a5d34dbc Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 31 Aug 2022 18:45:43 -0400 Subject: [PATCH 080/326] BW-1393 Release doc updates (#6839) --- processes/release_processes/README.MD | 3 ++- publish/publish_inputs.json | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/processes/release_processes/README.MD b/processes/release_processes/README.MD index d16f97fddef..d23d601b747 100644 --- a/processes/release_processes/README.MD +++ b/processes/release_processes/README.MD @@ -45,7 +45,8 @@ The release WDL uses a github token to perform actions on your behalf. Make or copy the following files into a temporary `release/` directory outside the Cromwell repository. This removes any chance of committing your token. * A copy of the workflow file to run (https://github.com/broadinstitute/cromwell/blob/develop/publish/publish_workflow.wdl) -* An inputs json like this: +* An inputs json like this one. + * `publishHomebrew` is `false` due to Homebrew taking on this part themselves. Homebrew remains an active distribution channel. ```json { diff --git a/publish/publish_inputs.json b/publish/publish_inputs.json index e9c54b9f3d6..ea877b9c34e 100644 --- a/publish/publish_inputs.json +++ b/publish/publish_inputs.json @@ -1,7 +1,7 @@ { - "publish_workflow.githubToken": "<>", - "publish_workflow.organization": "<>", - "publish_workflow.publishDocker": <>, - "publish_workflow.majorRelease": <>, - "publish_workflow.publishHomebrew": <> + "publish_workflow.githubToken": "<>", + "publish_workflow.majorRelease": true, + "publish_workflow.publishHomebrew": false, + "publish_workflow.publishDocker": "broadinstitute/cromwell-publish:latest", + "publish_workflow.organization": "broadinstitute" } From f289382eb20181c8631bd90bf1fb219f46553478 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 9 Sep 2022 09:13:58 -0400 Subject: [PATCH 081/326] BT-732 Checksum validation for blobs read by engine (#6838) * Draft support for optional FileHash * Draft getMd5 for BlobPath * Resolve non-parallel IO to fix tests * Checksum validation for BlobPath * Nicer error message * Test for missing Blob hash * Break attr acquisition into separate method * Cleanup, comments * In-progress tests of blob hash command * Remove test * Remove unused import --- .../cromwell/engine/io/nio/NioFlow.scala | 47 ++++++++++++++----- .../cromwell/engine/io/nio/NioFlowSpec.scala | 39 ++++++++++++++- .../filesystems/blob/BlobPathBuilder.scala | 19 +++++++- 3 files changed, 91 insertions(+), 14 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala b/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala index 0598e154f2d..012e771b5a0 100644 --- a/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala +++ b/engine/src/main/scala/cromwell/engine/io/nio/NioFlow.scala @@ -12,6 +12,7 @@ import cromwell.core.path.Path import cromwell.engine.io.IoActor._ import cromwell.engine.io.RetryableRequestSupport.{isInfinitelyRetryable, isRetryable} import cromwell.engine.io.{IoAttempts, IoCommandContext, IoCommandStalenessBackpressuring} +import cromwell.filesystems.blob.BlobPath import cromwell.filesystems.drs.DrsPath import cromwell.filesystems.gcs.GcsPath import cromwell.filesystems.s3.S3Path @@ -128,21 +129,33 @@ class NioFlow(parallelism: Int, def readFileAndChecksum: IO[String] = { for { - fileHash <- getHash(command.file) + fileHash <- getStoredHash(command.file) uncheckedValue <- readFile - checksumResult <- checkHash(uncheckedValue, fileHash) + checksumResult <- fileHash match { + case Some(hash) => checkHash(uncheckedValue, hash) + // If there is no stored checksum, don't attempt to validate. + // If the missing checksum is itself an error condition, that + // should be detected by the code that gets the FileHash. + case None => IO.pure(ChecksumSkipped()) + } verifiedValue <- checksumResult match { case _: ChecksumSkipped => IO.pure(uncheckedValue) case _: ChecksumSuccess => IO.pure(uncheckedValue) case failure: ChecksumFailure => IO.raiseError( ChecksumFailedException( - s"Failed checksum for '${command.file}'. Expected '${fileHash.hashType}' hash of '${fileHash.hash}'. Calculated hash '${failure.calculatedHash}'")) + fileHash match { + case Some(hash) => s"Failed checksum for '${command.file}'. Expected '${hash.hashType}' hash of '${hash.hash}'. Calculated hash '${failure.calculatedHash}'" + case None => s"Failed checksum for '${command.file}'. Couldn't find stored file hash." // This should never happen + } + ) + ) } } yield verifiedValue } val fileContentIo = command.file match { - case _: DrsPath => readFileAndChecksum + case _: DrsPath => readFileAndChecksum + case _: BlobPath => readFileAndChecksum case _ => readFile } fileContentIo.map(_.replaceAll("\\r\\n", "\\\n")) @@ -153,19 +166,27 @@ class NioFlow(parallelism: Int, } private def hash(hash: IoHashCommand): IO[String] = { - getHash(hash.file).map(_.hash) + // If there is no hash accessible from the file storage system, + // we'll read the file and generate the hash ourselves. + getStoredHash(hash.file).flatMap { + case Some(storedHash) => IO.pure(storedHash) + case None => generateMd5FileHashForPath(hash.file) + }.map(_.hash) } - private def getHash(file: Path): IO[FileHash] = { + private def getStoredHash(file: Path): IO[Option[FileHash]] = { file match { - case gcsPath: GcsPath => getFileHashForGcsPath(gcsPath) + case gcsPath: GcsPath => getFileHashForGcsPath(gcsPath).map(Option(_)) + case blobPath: BlobPath => getFileHashForBlobPath(blobPath) case drsPath: DrsPath => IO { + // We assume all DRS files have a stored hash; this will throw + // if the file does not. drsPath.getFileHash - } + }.map(Option(_)) case s3Path: S3Path => IO { - FileHash(HashType.S3Etag, s3Path.eTag) + Option(FileHash(HashType.S3Etag, s3Path.eTag)) } - case path => getMd5FileHashForPath(path) + case _ => IO.pure(None) } } @@ -201,7 +222,11 @@ class NioFlow(parallelism: Int, gcsPath.objectBlobId.map(id => FileHash(HashType.GcsCrc32c, gcsPath.cloudStorage.get(id).getCrc32c)) } - private def getMd5FileHashForPath(path: Path): IO[FileHash] = delayedIoFromTry { + private def getFileHashForBlobPath(blobPath: BlobPath): IO[Option[FileHash]] = delayedIoFromTry { + blobPath.md5HexString.map(md5 => md5.map(FileHash(HashType.Md5, _))) + } + + private def generateMd5FileHashForPath(path: Path): IO[FileHash] = delayedIoFromTry { tryWithResource(() => path.newInputStream) { inputStream => FileHash(HashType.Md5, org.apache.commons.codec.digest.DigestUtils.md5Hex(inputStream)) } diff --git a/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala b/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala index 4b3461ae7d3..b01d52eece0 100644 --- a/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala +++ b/engine/src/test/scala/cromwell/engine/io/nio/NioFlowSpec.scala @@ -20,13 +20,14 @@ import org.mockito.Mockito.{times, verify, when} import org.scalatest.flatspec.AsyncFlatSpecLike import org.scalatest.matchers.should.Matchers import common.mock.MockSugar +import cromwell.filesystems.blob.BlobPath import java.nio.file.NoSuchFileException import java.util.UUID import scala.concurrent.ExecutionContext import scala.concurrent.duration._ import scala.language.postfixOps -import scala.util.Failure +import scala.util.{Failure, Success, Try} import scala.util.control.NoStackTrace class NioFlowSpec extends TestKitSuite with AsyncFlatSpecLike with Matchers with MockSugar { @@ -127,6 +128,23 @@ class NioFlowSpec extends TestKitSuite with AsyncFlatSpecLike with Matchers with } } + it should "get hash from a BlobPath when stored hash exists" in { + val testPath = mock[BlobPath] + val hashString = "2d01d5d9c24034d54fe4fba0ede5182d" // echo "hello there" | md5sum + testPath.md5HexString returns Try(Option(hashString)) + + val context = DefaultCommandContext(hashCommand(testPath).get, replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(success.result.asInstanceOf[String] == hashString) + case (ack, _) => + fail(s"read returned an unexpected message:\n$ack\n\n") + } + } + it should "fail if DrsPath hash doesn't match checksum" in { val testPath = mock[DrsPath] when(testPath.limitFileContent(any[Option[Int]], any[Boolean])(any[ExecutionContext])).thenReturn("hello".getBytes) @@ -171,6 +189,25 @@ class NioFlowSpec extends TestKitSuite with AsyncFlatSpecLike with Matchers with } } + it should "succeed if a BlobPath is missing a stored hash" in { + val testPath = mock[BlobPath] + when(testPath.limitFileContent(any[Option[Int]], any[Boolean])(any[ExecutionContext])) + .thenReturn("hello there".getBytes) + when(testPath.md5HexString) + .thenReturn(Success(None)) + + val context = DefaultCommandContext(contentAsStringCommand(testPath, Option(100), failOnOverflow = true).get, replyTo) + val testSource = Source.single(context) + + val stream = testSource.via(flow).toMat(readSink)(Keep.right) + + stream.run() map { + case (success: IoSuccess[_], _) => assert(success.result.asInstanceOf[String] == "hello there") + case (ack, _) => + fail(s"read returned an unexpected message:\n$ack\n\n") + } + } + it should "copy Nio paths" in { val testPath = DefaultPathBuilder.createTempFile() val testCopyPath = testPath.sibling(UUID.randomUUID().toString) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index 69a21c90eda..b89a335b9de 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -1,13 +1,13 @@ package cromwell.filesystems.blob import com.azure.core.credential.AzureSasCredential -import com.azure.storage.blob.nio.AzureFileSystem +import com.azure.storage.blob.nio.{AzureBlobFileAttributes, AzureFileSystem} import com.google.common.net.UrlEscapers import cromwell.core.path.{NioPath, Path, PathBuilder} import cromwell.filesystems.blob.BlobPathBuilder._ import java.net.{MalformedURLException, URI} -import java.nio.file.{FileSystem, FileSystemNotFoundException, FileSystems} +import java.nio.file.{FileSystem, FileSystemNotFoundException, FileSystems, Files} import scala.jdk.CollectionConverters._ import scala.language.postfixOps import scala.util.{Failure, Try} @@ -90,4 +90,19 @@ case class BlobPath private[blob](nioPath: NioPath, endpoint: String, container: override def pathAsString: String = List(endpoint, container, nioPath.toString()).mkString("/") override def pathWithoutScheme: String = parseURI(endpoint).getHost + "/" + container + "/" + nioPath.toString() + + def blobFileAttributes: Try[AzureBlobFileAttributes] = + Try(Files.readAttributes(nioPath, classOf[AzureBlobFileAttributes])) + + def md5HexString: Try[Option[String]] = { + blobFileAttributes.map(h => + Option(h.blobHttpHeaders().getContentMd5) match { + case None => None + case Some(arr) if arr.isEmpty => None + // Convert the bytes to a hex-encoded string. Note that this value + // is rendered in base64 in the Azure web portal. + case Some(bytes) => Option(bytes.map("%02x".format(_)).mkString) + } + ) + } } From 01d63af33fec14ff00076753c96075ca931c0c77 Mon Sep 17 00:00:00 2001 From: Christian Freitas Date: Mon, 12 Sep 2022 16:18:51 -0400 Subject: [PATCH 082/326] BT-711 Refresh SAS token for filesystem on expiry (#6831) * BT-711 Refresh SAS token for filesystem on expiry * Rough cut of token refresh using exceptions * Ignore tests, and minor cleanup * Remove stray line * Draft of manager class for handling expiring file systems * Style fixes * Refactor of blobfilesystemManager and tests covering its functionality * Refined tests to validate close filesystem as separate unit * Ignore connected tests * Clean up of some things * Refactor BlobFileSystemManager to separate file, and some other cleanup * Some additional scala-ifying * Small cleanup * Correcting imports * trigger tests * trigger tests --- .../blob/BlobFileSystemManager.scala | 140 ++++++++++++++ .../filesystems/blob/BlobPathBuilder.scala | 79 ++++---- .../blob/BlobPathBuilderFactory.scala | 111 ++--------- .../blob/BlobPathBuilderFactorySpec.scala | 179 +++++++++++++++++- .../blob/BlobPathBuilderSpec.scala | 68 ++++--- 5 files changed, 406 insertions(+), 171 deletions(-) create mode 100644 filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala new file mode 100644 index 00000000000..3b8f5149055 --- /dev/null +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala @@ -0,0 +1,140 @@ +package cromwell.filesystems.blob + +import com.azure.core.credential.AzureSasCredential +import com.azure.core.management.AzureEnvironment +import com.azure.core.management.profile.AzureProfile +import com.azure.identity.DefaultAzureCredentialBuilder +import com.azure.resourcemanager.AzureResourceManager +import com.azure.storage.blob.nio.AzureFileSystem +import com.azure.storage.blob.sas.{BlobContainerSasPermission, BlobServiceSasSignatureValues} +import com.azure.storage.blob.{BlobContainerClient, BlobContainerClientBuilder} +import com.azure.storage.common.StorageSharedKeyCredential + +import java.net.URI +import java.nio.file.{FileSystem, FileSystemNotFoundException, FileSystems} +import java.time.temporal.ChronoUnit +import java.time.{Duration, Instant, OffsetDateTime} +import scala.jdk.CollectionConverters._ +import scala.util.{Failure, Success, Try} +import com.azure.resourcemanager.storage.models.StorageAccountKey + +case class FileSystemAPI() { + def getFileSystem(uri: URI): Try[FileSystem] = Try(FileSystems.getFileSystem(uri)) + def newFileSystem(uri: URI, config: Map[String, Object]): FileSystem = FileSystems.newFileSystem(uri, config.asJava) + def closeFileSystem(uri: URI): Option[Unit] = getFileSystem(uri).toOption.map(_.close) +} + +object BlobFileSystemManager { + def parseTokenExpiry(token: AzureSasCredential): Option[Instant] = for { + expiryString <- token.getSignature.split("&").find(_.startsWith("se")).map(_.replaceFirst("se=","")).map(_.replace("%3A", ":")) + instant = Instant.parse(expiryString) + } yield instant + + def buildConfigMap(credential: AzureSasCredential, container: BlobContainerName): Map[String, Object] = { + Map((AzureFileSystem.AZURE_STORAGE_SAS_TOKEN_CREDENTIAL, credential), + (AzureFileSystem.AZURE_STORAGE_FILE_STORES, container.value), + (AzureFileSystem.AZURE_STORAGE_SKIP_INITIAL_CONTAINER_CHECK, java.lang.Boolean.TRUE)) + } + def hasTokenExpired(tokenExpiry: Instant, buffer: Duration): Boolean = Instant.now.plus(buffer).isAfter(tokenExpiry) + def uri(endpoint: EndpointURL) = new URI("azb://?endpoint=" + endpoint) +} +case class BlobFileSystemManager( + container: BlobContainerName, + endpoint: EndpointURL, + expiryBufferMinutes: Long, + blobTokenGenerator: BlobTokenGenerator, + fileSystemAPI: FileSystemAPI = FileSystemAPI(), + private val initialExpiration: Option[Instant] = None) { + private var expiry: Option[Instant] = initialExpiration + val buffer: Duration = Duration.of(expiryBufferMinutes, ChronoUnit.MINUTES) + + def getExpiry: Option[Instant] = expiry + def uri: URI = BlobFileSystemManager.uri(endpoint) + def isTokenExpired: Boolean = expiry.exists(BlobFileSystemManager.hasTokenExpired(_, buffer)) + def shouldReopenFilesystem: Boolean = isTokenExpired || expiry.isEmpty + def retrieveFilesystem(): Try[FileSystem] = { + synchronized { + shouldReopenFilesystem match { + case false => fileSystemAPI.getFileSystem(uri).recoverWith { + // If no filesystem already exists, this will create a new connection, with the provided configs + case _: FileSystemNotFoundException => blobTokenGenerator.generateAccessToken.flatMap(generateFilesystem(uri, container, _)) + } + // If the token has expired, OR there is no token record, try to close the FS and regenerate + case true => + fileSystemAPI.closeFileSystem(uri) + blobTokenGenerator.generateAccessToken.flatMap(generateFilesystem(uri, container, _)) + } + } + } + + private def generateFilesystem(uri: URI, container: BlobContainerName, token: AzureSasCredential): Try[FileSystem] = { + expiry = BlobFileSystemManager.parseTokenExpiry(token) + if (expiry.isEmpty) return Failure(new Exception("Could not reopen filesystem, no expiration found")) + Try(fileSystemAPI.newFileSystem(uri, BlobFileSystemManager.buildConfigMap(token, container))) + } + +} + +sealed trait BlobTokenGenerator {def generateAccessToken: Try[AzureSasCredential]} +object BlobTokenGenerator { + def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, subscription: Option[String]): BlobTokenGenerator = { + createBlobTokenGenerator(container, endpoint, None, None, subscription) + } + def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, workspaceId: Option[WorkspaceId], workspaceManagerURL: Option[WorkspaceManagerURL], subscription: Option[String]): BlobTokenGenerator = { + (container: BlobContainerName, endpoint: EndpointURL, workspaceId, workspaceManagerURL) match { + case (container, endpoint, None, None) => + NativeBlobTokenGenerator(container, endpoint, subscription) + case (container, endpoint, Some(workspaceId), Some(workspaceManagerURL)) => + WSMBlobTokenGenerator(container, endpoint, workspaceId, workspaceManagerURL) + case _ => + throw new Exception("Arguments provided do not match any available BlobTokenGenerator implementation.") + } + } + def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL): BlobTokenGenerator = createBlobTokenGenerator(container, endpoint, None) + def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, workspaceId: Option[WorkspaceId], workspaceManagerURL: Option[WorkspaceManagerURL]): BlobTokenGenerator = + createBlobTokenGenerator(container, endpoint, workspaceId, workspaceManagerURL, None) + +} + +case class WSMBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, workspaceId: WorkspaceId, workspaceManagerURL: WorkspaceManagerURL) extends BlobTokenGenerator { + def generateAccessToken: Try[AzureSasCredential] = Failure(new NotImplementedError) +} + +case class NativeBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, subscription: Option[String] = None) extends BlobTokenGenerator { + + private val azureProfile = new AzureProfile(AzureEnvironment.AZURE) + private def azureCredentialBuilder = new DefaultAzureCredentialBuilder() + .authorityHost(azureProfile.getEnvironment.getActiveDirectoryEndpoint) + .build + private def authenticateWithSubscription(sub: String) = AzureResourceManager.authenticate(azureCredentialBuilder, azureProfile).withSubscription(sub) + private def authenticateWithDefaultSubscription = AzureResourceManager.authenticate(azureCredentialBuilder, azureProfile).withDefaultSubscription() + private def azure = subscription.map(authenticateWithSubscription(_)).getOrElse(authenticateWithDefaultSubscription) + + private def findAzureStorageAccount(name: StorageAccountName) = azure.storageAccounts.list.asScala.find(_.name.equals(name.value)) + .map(Success(_)).getOrElse(Failure(new Exception("Azure Storage Account not found"))) + private def buildBlobContainerClient(credential: StorageSharedKeyCredential, endpoint: EndpointURL, container: BlobContainerName): BlobContainerClient = { + new BlobContainerClientBuilder() + .credential(credential) + .endpoint(endpoint.value) + .containerName(container.value) + .buildClient() + } + private val bcsp = new BlobContainerSasPermission() + .setReadPermission(true) + .setCreatePermission(true) + .setListPermission(true) + + + def generateAccessToken: Try[AzureSasCredential] = for { + uri <- BlobPathBuilder.parseURI(endpoint.value) + configuredAccount <- BlobPathBuilder.parseStorageAccount(uri) + azureAccount <- findAzureStorageAccount(configuredAccount) + keys = azureAccount.getKeys.asScala + key <- keys.headOption.fold[Try[StorageAccountKey]](Failure(new Exception("Storage account has no keys")))(Success(_)) + first = key.value + sskc = new StorageSharedKeyCredential(configuredAccount.value, first) + bcc = buildBlobContainerClient(sskc, endpoint, container) + bsssv = new BlobServiceSasSignatureValues(OffsetDateTime.now.plusDays(1), bcsp) + asc = new AzureSasCredential(bcc.generateSas(bsssv)) + } yield asc +} diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index b89a335b9de..3e69ce2a7bd 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -1,16 +1,14 @@ package cromwell.filesystems.blob -import com.azure.core.credential.AzureSasCredential -import com.azure.storage.blob.nio.{AzureBlobFileAttributes, AzureFileSystem} +import com.azure.storage.blob.nio.{AzureBlobFileAttributes} import com.google.common.net.UrlEscapers import cromwell.core.path.{NioPath, Path, PathBuilder} import cromwell.filesystems.blob.BlobPathBuilder._ import java.net.{MalformedURLException, URI} -import java.nio.file.{FileSystem, FileSystemNotFoundException, FileSystems, Files} -import scala.jdk.CollectionConverters._ import scala.language.postfixOps -import scala.util.{Failure, Try} +import scala.util.{Failure, Success, Try} +import java.nio.file.Files object BlobPathBuilder { @@ -18,9 +16,10 @@ object BlobPathBuilder { case class ValidBlobPath(path: String) extends BlobPathValidation case class UnparsableBlobPath(errorMessage: Throwable) extends BlobPathValidation - def invalidBlobPathMessage(container: String, endpoint: String) = s"Malformed Blob URL for this builder. Expecting a URL for a container $container and endpoint $endpoint" - def parseURI(string: String) = URI.create(UrlEscapers.urlFragmentEscaper().escape(string)) - def parseStorageAccount(uri: URI) = uri.getHost().split("\\.").filter(!_.isEmpty()).headOption + def invalidBlobPathMessage(container: BlobContainerName, endpoint: EndpointURL) = s"Malformed Blob URL for this builder. Expecting a URL for a container $container and endpoint $endpoint" + def parseURI(string: String): Try[URI] = Try(URI.create(UrlEscapers.urlFragmentEscaper().escape(string))) + def parseStorageAccount(uri: URI): Try[StorageAccountName] = uri.getHost.split("\\.").find(_.nonEmpty).map(StorageAccountName(_)) + .map(Success(_)).getOrElse(Failure(new Exception("Could not parse storage account"))) /** * Validates a that a path from a string is a valid BlobPath of the format: @@ -40,56 +39,50 @@ object BlobPathBuilder { * * If the configured container and storage account do not match, the string is considered unparsable */ - def validateBlobPath(string: String, container: String, endpoint: String): BlobPathValidation = { - Try { - val uri = parseURI(string) - val storageAccount = parseStorageAccount(parseURI(endpoint)) - val hasContainer = uri.getPath().split("/").filter(!_.isEmpty()).headOption.contains(container) - def hasEndpoint = parseStorageAccount(uri).contains(storageAccount.get) - if (hasContainer && !storageAccount.isEmpty && hasEndpoint) { - ValidBlobPath(uri.getPath.replaceFirst("/" + container, "")) - } else { - UnparsableBlobPath(new MalformedURLException(invalidBlobPathMessage(container, endpoint))) + def validateBlobPath(string: String, container: BlobContainerName, endpoint: EndpointURL): BlobPathValidation = { + val blobValidation = for { + testUri <- parseURI(string) + endpointUri <- parseURI(endpoint.value) + testStorageAccount <- parseStorageAccount(testUri) + endpointStorageAccount <- parseStorageAccount(endpointUri) + hasContainer = testUri.getPath.split("/").find(_.nonEmpty).contains(container.value) + hasEndpoint = testStorageAccount.equals(endpointStorageAccount) + blobPathValidation = (hasContainer && hasEndpoint) match { + case true => ValidBlobPath(testUri.getPath.replaceFirst("/" + container, "")) + case false => UnparsableBlobPath(new MalformedURLException(invalidBlobPathMessage(container, endpoint))) } - } recover { case t => UnparsableBlobPath(t) } get + } yield blobPathValidation + blobValidation recover { case t => UnparsableBlobPath(t) } get } } -class BlobPathBuilder(blobTokenGenerator: BlobTokenGenerator, container: String, endpoint: String) extends PathBuilder { - - val credential: AzureSasCredential = new AzureSasCredential(blobTokenGenerator.getAccessToken) - val fileSystemConfig: Map[String, Object] = Map((AzureFileSystem.AZURE_STORAGE_SAS_TOKEN_CREDENTIAL, credential), - (AzureFileSystem.AZURE_STORAGE_FILE_STORES, container), - (AzureFileSystem.AZURE_STORAGE_SKIP_INITIAL_CONTAINER_CHECK, java.lang.Boolean.TRUE)) - - def retrieveFilesystem(uri: URI): Try[FileSystem] = { - Try(FileSystems.getFileSystem(uri)) recover { - // If no filesystem already exists, this will create a new connection, with the provided configs - case _: FileSystemNotFoundException => FileSystems.newFileSystem(uri, fileSystemConfig.asJava) - } - } +class BlobPathBuilder(container: BlobContainerName, endpoint: EndpointURL)(private val fsm: BlobFileSystemManager) extends PathBuilder { def build(string: String): Try[BlobPath] = { validateBlobPath(string, container, endpoint) match { - case ValidBlobPath(path) => for { - fileSystem <- retrieveFilesystem(new URI("azb://?endpoint=" + endpoint)) - nioPath <- Try(fileSystem.getPath(path)) - blobPath = BlobPath(nioPath, endpoint, container) - } yield blobPath + case ValidBlobPath(path) => Try(BlobPath(path, endpoint, container)(fsm)) case UnparsableBlobPath(errorMessage: Throwable) => Failure(errorMessage) } } - override def name: String = "Azure Blob Storage" } -// Add args for container, storage account name -case class BlobPath private[blob](nioPath: NioPath, endpoint: String, container: String) extends Path { - override protected def newPath(nioPath: NioPath): Path = BlobPath(nioPath, endpoint, container) +case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, container: BlobContainerName)(private val fsm: BlobFileSystemManager) extends Path { + override def nioPath: NioPath = findNioPath(pathString) + + override protected def newPath(nioPath: NioPath): Path = BlobPath(nioPath.toString, endpoint, container)(fsm) + + override def pathAsString: String = List(endpoint, container, nioPath.toString).mkString("/") - override def pathAsString: String = List(endpoint, container, nioPath.toString()).mkString("/") + //This is purposefully an unprotected get because if the endpoint cannot be parsed this should fail loudly rather than quietly + override def pathWithoutScheme: String = parseURI(endpoint.value).map(_.getHost + "/" + container + "/" + nioPath.toString).get - override def pathWithoutScheme: String = parseURI(endpoint).getHost + "/" + container + "/" + nioPath.toString() + private def findNioPath(path: String): NioPath = (for { + fileSystem <- fsm.retrieveFilesystem() + nioPath = fileSystem.getPath(path) + // This is purposefully an unprotected get because the NIO API needing an unwrapped path object. + // If an error occurs the api expects a thrown exception + } yield nioPath).get def blobFileAttributes: Try[AzureBlobFileAttributes] = Try(Files.readAttributes(nioPath, classOf[AzureBlobFileAttributes])) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala index b12abf5cc34..c93f751b706 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala @@ -1,114 +1,35 @@ package cromwell.filesystems.blob import akka.actor.ActorSystem -import com.azure.core.management.AzureEnvironment -import com.azure.core.management.profile.AzureProfile -import com.azure.identity.DefaultAzureCredentialBuilder -import com.azure.resourcemanager.AzureResourceManager -import com.azure.storage.blob.BlobContainerClientBuilder -import com.azure.storage.blob.sas.{BlobContainerSasPermission, BlobServiceSasSignatureValues} -import com.azure.storage.common.StorageSharedKeyCredential import com.typesafe.config.Config import cromwell.core.WorkflowOptions import cromwell.core.path.PathBuilderFactory import net.ceedubs.ficus.Ficus._ -import java.time.OffsetDateTime -import scala.concurrent.ExecutionContext -import scala.concurrent.Future -import scala.jdk.CollectionConverters._ +import scala.concurrent.{ExecutionContext, Future} final case class BlobFileSystemConfig(config: Config) + +final case class BlobContainerName(value: String) {override def toString: String = value} +final case class StorageAccountName(value: String) {override def toString: String = value} +final case class EndpointURL(value: String) {override def toString: String = value} +final case class WorkspaceId(value: String) {override def toString: String = value} +final case class WorkspaceManagerURL(value: String) {override def toString: String = value} final case class BlobPathBuilderFactory(globalConfig: Config, instanceConfig: Config, singletonConfig: BlobFileSystemConfig) extends PathBuilderFactory { - val container: String = instanceConfig.as[String]("store") - val endpoint: String = instanceConfig.as[String]("endpoint") - val workspaceId: Option[String] = instanceConfig.as[Option[String]]("workspace-id") - val workspaceManagerURL: Option[String] = singletonConfig.config.as[Option[String]]("workspace-manager-url") + val subscription: Option[String] = instanceConfig.as[Option[String]]("subscription") + val container: BlobContainerName = BlobContainerName(instanceConfig.as[String]("container")) + val endpoint: EndpointURL = EndpointURL(instanceConfig.as[String]("endpoint")) + val workspaceId: Option[WorkspaceId] = instanceConfig.as[Option[String]]("workspace-id").map(WorkspaceId(_)) + val expiryBufferMinutes: Long = instanceConfig.as[Option[Long]]("expiry-buffer-minutes").getOrElse(10) + val workspaceManagerURL: Option[WorkspaceManagerURL] = singletonConfig.config.as[Option[String]]("workspace-manager-url").map(WorkspaceManagerURL(_)) val blobTokenGenerator: BlobTokenGenerator = BlobTokenGenerator.createBlobTokenGenerator( - container, endpoint, workspaceId, workspaceManagerURL) + container, endpoint, workspaceId, workspaceManagerURL, subscription) + val fsm: BlobFileSystemManager = BlobFileSystemManager(container, endpoint, expiryBufferMinutes, blobTokenGenerator) override def withOptions(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[BlobPathBuilder] = { Future { - new BlobPathBuilder(blobTokenGenerator, container, endpoint) + new BlobPathBuilder(container, endpoint)(fsm) } } } - -sealed trait BlobTokenGenerator { - def getAccessToken: String -} - -object BlobTokenGenerator { - def createBlobTokenGenerator(container: String, endpoint: String): BlobTokenGenerator = { - createBlobTokenGenerator(container, endpoint, None, None) - } - def createBlobTokenGenerator(container: String, endpoint: String, workspaceId: Option[String], workspaceManagerURL: Option[String]): BlobTokenGenerator = { - (container: String, endpoint: String, workspaceId, workspaceManagerURL) match { - case (container, endpoint, None, None) => - NativeBlobTokenGenerator(container, endpoint) - case (container, endpoint, Some(workspaceId), Some(workspaceManagerURL)) => - WSMBlobTokenGenerator(container, endpoint, workspaceId, workspaceManagerURL) - case _ => - throw new Exception("Arguments provided do not match any available BlobTokenGenerator implementation.") - } - } -} - -case class WSMBlobTokenGenerator(container: String, endpoint: String, workspaceId: String, workspaceManagerURL: String) extends BlobTokenGenerator { - def getAccessToken: String = { - throw new NotImplementedError - } -} - -case class NativeBlobTokenGenerator(container: String, endpoint: String) extends BlobTokenGenerator { - def getAccessToken: String = { - val storageAccountName = BlobPathBuilder.parseStorageAccount(BlobPathBuilder.parseURI(endpoint)) match { - case Some(storageAccountName) => storageAccountName - case _ => throw new Exception("Storage account could not be parsed from endpoint") - } - - val profile = new AzureProfile(AzureEnvironment.AZURE) - val azureCredential = new DefaultAzureCredentialBuilder() - .authorityHost(profile.getEnvironment.getActiveDirectoryEndpoint) - .build - val azure = AzureResourceManager.authenticate(azureCredential, profile).withDefaultSubscription - - val storageAccounts = azure.storageAccounts() - val storageAccount = storageAccounts - .list() - .asScala - .find(_.name == storageAccountName) - - val storageAccountKeys = storageAccount match { - case Some(value) => value.getKeys.asScala.map(_.value()) - case _ => throw new Exception("Storage Account not found") - } - - val storageAccountKey = storageAccountKeys.headOption match { - case Some(value) => value - case _ => throw new Exception("Storage Account has no keys") - } - - val keyCredential = new StorageSharedKeyCredential( - storageAccountName, - storageAccountKey - ) - val blobContainerClient = new BlobContainerClientBuilder() - .credential(keyCredential) - .endpoint(endpoint) - .containerName(container) - .buildClient() - - val blobContainerSasPermission = new BlobContainerSasPermission() - .setReadPermission(true) - .setCreatePermission(true) - .setListPermission(true) - val blobServiceSasSignatureValues = new BlobServiceSasSignatureValues( - OffsetDateTime.now.plusDays(1), - blobContainerSasPermission - ) - - blobContainerClient.generateSas(blobServiceSasSignatureValues) - } -} diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderFactorySpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderFactorySpec.scala index 08efd534056..f5b71a06ead 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderFactorySpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderFactorySpec.scala @@ -1,28 +1,189 @@ package cromwell.filesystems.blob +import com.azure.core.credential.AzureSasCredential import com.typesafe.config.ConfigFactory +import common.mock.MockSugar +import org.mockito.Mockito._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -class BlobPathBuilderFactorySpec extends AnyFlatSpec with Matchers { +import java.nio.file.{FileSystem, FileSystemNotFoundException} +import java.time.format.DateTimeFormatter +import java.time.temporal.ChronoUnit +import java.time.{Duration, Instant, ZoneId} +import scala.util.{Failure, Try} + +object BlobPathBuilderFactorySpec { + def buildExampleSasToken(expiry: Instant): AzureSasCredential = { + val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.systemDefault()) + val sv = formatter.format(expiry) + val se = expiry.toString().replace(":","%3A") + new AzureSasCredential(s"sv=$sv&se=$se&sr=c&sp=rcl") + } +} +class BlobPathBuilderFactorySpec extends AnyFlatSpec with Matchers with MockSugar { + def generateTokenExpiration(minutes: Long) = Instant.now.plus(minutes, ChronoUnit.MINUTES) it should "parse configs for a functioning factory" in { - val endpoint = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage") - val store = "inputs" - val workspaceId = "mockWorkspaceId" - val workspaceManagerURL = "https://test.ws.org" + val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") + val container = BlobContainerName("storageContainer") + val workspaceId = WorkspaceId("mockWorkspaceId") + val workspaceManagerURL = WorkspaceManagerURL("https://test.ws.org") val instanceConfig = ConfigFactory.parseString( s""" - |store = "$store" + |container = "$container" + |endpoint = "$endpoint" + |expiry-buffer-minutes = "10" |workspace-id = "$workspaceId" """.stripMargin) val singletonConfig = ConfigFactory.parseString(s"""workspace-manager-url = "$workspaceManagerURL" """) val globalConfig = ConfigFactory.parseString("""""") val factory = BlobPathBuilderFactory(globalConfig, instanceConfig, new BlobFileSystemConfig(singletonConfig)) - factory.container should equal(store) + factory.container should equal(container) factory.endpoint should equal(endpoint) - factory.workspaceId should equal(Some(workspaceId)) - factory.workspaceManagerURL should equal(Some(workspaceManagerURL)) + factory.expiryBufferMinutes should equal(10L) + factory.workspaceId should contain(workspaceId) + factory.workspaceManagerURL should contain(workspaceManagerURL) + } + + it should "build an example sas token of the correct format" in { + val testToken = BlobPathBuilderFactorySpec.buildExampleSasToken(Instant.ofEpochMilli(1603794041000L)) + val sourceToken = "sv=2020-10-27&se=2020-10-27T10%3A20%3A41Z&sr=c&sp=rcl" + testToken.getSignature should equal(sourceToken) + } + + it should "parse an expiration time from a sas token" in { + val expiryTime = generateTokenExpiration(20L) + val sasToken = BlobPathBuilderFactorySpec.buildExampleSasToken(expiryTime) + val expiry = BlobFileSystemManager.parseTokenExpiry(sasToken) + expiry should contain(expiryTime) + } + + it should "verify an unexpired token will be processed as unexpired" in { + val expiryTime = generateTokenExpiration(11L) + val expired = BlobFileSystemManager.hasTokenExpired(expiryTime, Duration.ofMinutes(10L)) + expired shouldBe false + } + + it should "test an expired token will be processed as expired" in { + val expiryTime = generateTokenExpiration(9L) + val expired = BlobFileSystemManager.hasTokenExpired(expiryTime, Duration.ofMinutes(10L)) + expired shouldBe true + } + + it should "test that a filesystem gets closed correctly" in { + val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") + val azureUri = BlobFileSystemManager.uri(endpoint) + val fileSystems = mock[FileSystemAPI] + val fileSystem = mock[FileSystem] + when(fileSystems.getFileSystem(azureUri)).thenReturn(Try(fileSystem)) + when(fileSystems.closeFileSystem(azureUri)).thenCallRealMethod() + + fileSystems.closeFileSystem(azureUri) + verify(fileSystem, times(1)).close() + } + + it should "test retrieveFileSystem with expired filesystem" in { + val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") + val expiredToken = generateTokenExpiration(9L) + val refreshedToken = generateTokenExpiration(69L) + val sasToken = BlobPathBuilderFactorySpec.buildExampleSasToken(refreshedToken) + val container = BlobContainerName("storageContainer") + val configMap = BlobFileSystemManager.buildConfigMap(sasToken, container) + val azureUri = BlobFileSystemManager.uri(endpoint) + + val fileSystems = mock[FileSystemAPI] + val blobTokenGenerator = mock[BlobTokenGenerator] + when(blobTokenGenerator.generateAccessToken).thenReturn(Try(sasToken)) + + val fsm = BlobFileSystemManager(container, endpoint, 10L, blobTokenGenerator, fileSystems, Some(expiredToken)) + fsm.getExpiry should contain(expiredToken) + fsm.isTokenExpired shouldBe true + fsm.retrieveFilesystem() + + fsm.getExpiry should contain(refreshedToken) + fsm.isTokenExpired shouldBe false + verify(fileSystems, never()).getFileSystem(azureUri) + verify(fileSystems, times(1)).newFileSystem(azureUri, configMap) + verify(fileSystems, times(1)).closeFileSystem(azureUri) + } + + it should "test retrieveFileSystem with an unexpired fileSystem" in { + val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") + val initialToken = generateTokenExpiration(11L) + val refreshedToken = generateTokenExpiration(71L) + val sasToken = BlobPathBuilderFactorySpec.buildExampleSasToken(refreshedToken) + val container = BlobContainerName("storageContainer") + val configMap = BlobFileSystemManager.buildConfigMap(sasToken, container) + val azureUri = BlobFileSystemManager.uri(endpoint) + // Need a fake filesystem to supply the getFileSystem simulated try + val dummyFileSystem = mock[FileSystem] + + val fileSystems = mock[FileSystemAPI] + when(fileSystems.getFileSystem(azureUri)).thenReturn(Try(dummyFileSystem)) + + val blobTokenGenerator = mock[BlobTokenGenerator] + when(blobTokenGenerator.generateAccessToken).thenReturn(Try(sasToken)) + + val fsm = BlobFileSystemManager(container, endpoint, 10L, blobTokenGenerator, fileSystems, Some(initialToken)) + fsm.getExpiry should contain(initialToken) + fsm.isTokenExpired shouldBe false + fsm.retrieveFilesystem() + + fsm.getExpiry should contain(initialToken) + fsm.isTokenExpired shouldBe false + verify(fileSystems, times(1)).getFileSystem(azureUri) + verify(fileSystems, never()).newFileSystem(azureUri, configMap) + verify(fileSystems, never()).closeFileSystem(azureUri) + } + + it should "test retrieveFileSystem with an uninitialized filesystem" in { + val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") + val refreshedToken = generateTokenExpiration(71L) + val sasToken = BlobPathBuilderFactorySpec.buildExampleSasToken(refreshedToken) + val container = BlobContainerName("storageContainer") + val configMap = BlobFileSystemManager.buildConfigMap(sasToken, container) + val azureUri = BlobFileSystemManager.uri(endpoint) + + val fileSystems = mock[FileSystemAPI] + when(fileSystems.getFileSystem(azureUri)).thenReturn(Failure(new FileSystemNotFoundException)) + val blobTokenGenerator = mock[BlobTokenGenerator] + when(blobTokenGenerator.generateAccessToken).thenReturn(Try(sasToken)) + + val fsm = BlobFileSystemManager(container, endpoint, 10L, blobTokenGenerator, fileSystems, Some(refreshedToken)) + fsm.getExpiry.isDefined shouldBe true + fsm.isTokenExpired shouldBe false + fsm.retrieveFilesystem() + + fsm.getExpiry should contain(refreshedToken) + fsm.isTokenExpired shouldBe false + verify(fileSystems, times(1)).getFileSystem(azureUri) + verify(fileSystems, times(1)).newFileSystem(azureUri, configMap) + verify(fileSystems, never()).closeFileSystem(azureUri) + } + + it should "test retrieveFileSystem with an unknown filesystem" in { + val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") + val refreshedToken = generateTokenExpiration(71L) + val sasToken = BlobPathBuilderFactorySpec.buildExampleSasToken(refreshedToken) + val container = BlobContainerName("storageContainer") + val configMap = BlobFileSystemManager.buildConfigMap(sasToken, container) + val azureUri = BlobFileSystemManager.uri(endpoint) + + val fileSystems = mock[FileSystemAPI] + val blobTokenGenerator = mock[BlobTokenGenerator] + when(blobTokenGenerator.generateAccessToken).thenReturn(Try(sasToken)) + + val fsm = BlobFileSystemManager(container, endpoint, 10L, blobTokenGenerator, fileSystems) + fsm.getExpiry.isDefined shouldBe false + fsm.isTokenExpired shouldBe false + fsm.retrieveFilesystem() + + fsm.getExpiry should contain(refreshedToken) + fsm.isTokenExpired shouldBe false + verify(fileSystems, never()).getFileSystem(azureUri) + verify(fileSystems, times(1)).newFileSystem(azureUri, configMap) + verify(fileSystems, times(1)).closeFileSystem(azureUri) } } diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 69cec235aff..9975065a3e2 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -1,19 +1,22 @@ package cromwell.filesystems.blob +import common.mock.MockSugar +import org.mockito.Mockito.when import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -import java.nio.file.Files + +import scala.util.{Failure, Try} object BlobPathBuilderSpec { - def buildEndpoint(storageAccount: String) = s"https://$storageAccount.blob.core.windows.net" + def buildEndpoint(storageAccount: String) = EndpointURL(s"https://$storageAccount.blob.core.windows.net") } -class BlobPathBuilderSpec extends AnyFlatSpec with Matchers{ - +class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { + // ValidateBlobPath it should "parse a URI into a path" in { val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") - val container = "container" + val container = BlobContainerName("container") val evalPath = "/path/to/file" - val testString = endpoint + "/" + container + evalPath + val testString = endpoint.value + "/" + container + evalPath BlobPathBuilder.validateBlobPath(testString, container, endpoint) match { case BlobPathBuilder.ValidBlobPath(path) => path should equal(evalPath) case BlobPathBuilder.UnparsableBlobPath(errorMessage) => fail(errorMessage) @@ -22,53 +25,70 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers{ it should "bad storage account fails causes URI to fail parse into a path" in { val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") - val container = "container" + val container = BlobContainerName("container") val evalPath = "/path/to/file" - val testString = BlobPathBuilderSpec.buildEndpoint("badStorageAccount") + container + evalPath + val testString = BlobPathBuilderSpec.buildEndpoint("badStorageAccount").value + container.value + evalPath BlobPathBuilder.validateBlobPath(testString, container, endpoint) match { case BlobPathBuilder.ValidBlobPath(path) => fail(s"Valid path: $path found when verifying mismatched storage account") - case BlobPathBuilder.UnparsableBlobPath(errorMessage) => errorMessage.getMessage() should equal(BlobPathBuilder.invalidBlobPathMessage(container, endpoint)) + case BlobPathBuilder.UnparsableBlobPath(errorMessage) => errorMessage.getMessage should equal(BlobPathBuilder.invalidBlobPathMessage(container, endpoint)) } } it should "bad container fails causes URI to fail parse into a path" in { val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") - val container = "container" + val container = BlobContainerName("container") val evalPath = "/path/to/file" - val testString = endpoint + "badContainer" + evalPath + val testString = endpoint.value + "badContainer" + evalPath BlobPathBuilder.validateBlobPath(testString, container, endpoint) match { case BlobPathBuilder.ValidBlobPath(path) => fail(s"Valid path: $path found when verifying mismatched container") - case BlobPathBuilder.UnparsableBlobPath(errorMessage) => errorMessage.getMessage() should equal(BlobPathBuilder.invalidBlobPathMessage(container, endpoint)) + case BlobPathBuilder.UnparsableBlobPath(errorMessage) => errorMessage.getMessage should equal(BlobPathBuilder.invalidBlobPathMessage(container, endpoint)) } } + it should "provide a readable error when getting an illegal nioPath" in { + val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount") + val container = BlobContainerName("container") + val evalPath = "/path/to/file" + val exception = new Exception("Failed to do the thing") + val fsm = mock[BlobFileSystemManager] + when(fsm.retrieveFilesystem()).thenReturn(Failure(exception)) + val path = BlobPath(evalPath, endpoint, container)(fsm) + val testException = Try(path.nioPath).failed.toOption + testException should contain(exception) + } + ignore should "build a blob path from a test string and read a file" in { val endpoint = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage") - val endpointHost = BlobPathBuilder.parseURI(endpoint).getHost - val store = "inputs" + val endpointHost = BlobPathBuilder.parseURI(endpoint.value).map(_.getHost).getOrElse(fail("Could not parse URI")) + val store = BlobContainerName("inputs") val evalPath = "/test/inputFile.txt" - val blobTokenGenerator: BlobTokenGenerator = BlobTokenGenerator.createBlobTokenGenerator(store, endpoint) - val testString = endpoint + "/" + store + evalPath - val blobPath: BlobPath = new BlobPathBuilder(blobTokenGenerator, store, endpoint) build testString getOrElse fail() + val blobTokenGenerator = NativeBlobTokenGenerator(store, endpoint) + val fsm: BlobFileSystemManager = BlobFileSystemManager(store, endpoint, 10L, blobTokenGenerator) + val testString = endpoint.value + "/" + store + evalPath + val blobPath: BlobPath = new BlobPathBuilder(store, endpoint)(fsm) build testString getOrElse fail() blobPath.container should equal(store) blobPath.endpoint should equal(endpoint) blobPath.pathAsString should equal(testString) blobPath.pathWithoutScheme should equal(endpointHost + "/" + store + evalPath) - - val is = Files.newInputStream(blobPath.nioPath) + val is = blobPath.newInputStream() val fileText = (is.readAllBytes.map(_.toChar)).mkString fileText should include ("This is my test file!!!! Did it work?") } ignore should "build duplicate blob paths in the same filesystem" in { val endpoint = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage") - val store = "inputs" + val store = BlobContainerName("inputs") val evalPath = "/test/inputFile.txt" - val blobTokenGenerator: BlobTokenGenerator = BlobTokenGenerator.createBlobTokenGenerator(store, endpoint) - val testString = endpoint + "/" + store + evalPath - val blobPath1: BlobPath = new BlobPathBuilder(blobTokenGenerator, store, endpoint) build testString getOrElse fail() - val blobPath2: BlobPath = new BlobPathBuilder(blobTokenGenerator, store, endpoint) build testString getOrElse fail() + val blobTokenGenerator = NativeBlobTokenGenerator(store, endpoint) + val fsm: BlobFileSystemManager = BlobFileSystemManager(store, endpoint, 10, blobTokenGenerator) + val testString = endpoint.value + "/" + store + evalPath + val blobPath1: BlobPath = new BlobPathBuilder(store, endpoint)(fsm) build testString getOrElse fail() + blobPath1.nioPath.getFileSystem.close() + val blobPath2: BlobPath = new BlobPathBuilder(store, endpoint)(fsm) build testString getOrElse fail() blobPath1 should equal(blobPath2) + val is = blobPath1.newInputStream() + val fileText = (is.readAllBytes.map(_.toChar)).mkString + fileText should include ("This is my test file!!!! Did it work?") } } From 664de52e74a893b43169525baa5bcd5b0a9fd632 Mon Sep 17 00:00:00 2001 From: Christian Freitas Date: Mon, 12 Sep 2022 16:19:11 -0400 Subject: [PATCH 083/326] Batch 1 of scala steward updates (#6903) * Batch 1 of scala steward updates * Rollback snakeYAML * Attempt 3, with only the passing dependancies * Revert google API and Big Query udpates * Winding back other google deps * rollback remaining google updates * trigger tests * trigger tests --- project/Dependencies.scala | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index a6cc50eb351..fadd662d2f3 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -10,9 +10,9 @@ object Dependencies { // We would like to use the BOM to manage Azure SDK versions, but SBT doesn't support it. // https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/boms/azure-sdk-bom // https://github.com/sbt/sbt/issues/4531 - private val azureStorageBlobNioV = "12.0.0-beta.18" - private val azureIdentitySdkV = "1.4.2" - private val azureKeyVaultSdkV = "4.3.7" + private val azureStorageBlobNioV = "12.0.0-beta.19" + private val azureIdentitySdkV = "1.4.6" + private val azureKeyVaultSdkV = "4.3.8" private val betterFilesV = "3.9.1" /* cats-effect, fs2, http4s, and sttp (also to v3) should all be upgraded at the same time to use cats-effect 3.x. @@ -34,7 +34,7 @@ object Dependencies { private val configsV = "0.6.1" private val delightRhinoSandboxV = "0.0.15" private val diffsonSprayJsonV = "4.1.1" - private val ficusV = "1.5.1" + private val ficusV = "1.5.2" private val fs2V = "2.5.9" // scala-steward:off (CROM-6564) private val googleApiClientV = "1.33.2" private val googleCloudBigQueryV = "2.10.0" @@ -75,8 +75,8 @@ object Dependencies { private val metrics4ScalaV = "4.2.8" private val metrics3StatsdV = "4.2.0" private val mockFtpServerV = "3.0.0" - private val mockitoV = "3.11.2" - private val mockserverNettyV = "5.11.2" + private val mockitoV = "3.12.4" + private val mockserverNettyV = "5.14.0" private val mouseV = "1.0.10" /* Newer version 8.0.29 fails `Control characters should work with metadata` Centaur tests, has charset changes mentioned in release notes @@ -88,7 +88,7 @@ object Dependencies { private val postgresV = "42.3.3" private val pprintV = "0.7.1" private val rdf4jV = "3.7.1" - private val refinedV = "0.9.28" + private val refinedV = "0.9.29" private val rhinoV = "1.7.13" private val scalaCollectionCompatV = "2.5.0" private val scalaGraphV = "1.13.1" @@ -98,9 +98,9 @@ object Dependencies { private val scalameterV = "0.19" private val scalatestV = "3.2.10" private val scalatestScalacheckV = scalatestV + ".0" - private val scoptV = "4.0.1" + private val scoptV = "4.1.0" private val sentryLogbackV = "5.2.4" - private val shapelessV = "2.3.7" + private val shapelessV = "2.3.9" private val simulacrumV = "1.0.1" private val slf4jV = "1.7.32" private val slickCatsV = "0.10.4" @@ -191,8 +191,8 @@ object Dependencies { "com.azure" % "azure-security-keyvault-secrets" % azureKeyVaultSdkV exclude("jakarta.xml.bind", "jakarta.xml.bind-api") exclude("jakarta.activation", "jakarta.activation-api"), - "com.azure" % "azure-core-management" % "1.7.0", - "com.azure.resourcemanager" % "azure-resourcemanager" % "2.17.0" + "com.azure" % "azure-core-management" % "1.7.1", + "com.azure.resourcemanager" % "azure-resourcemanager" % "2.18.0" ) val implFtpDependencies = List( From 4884f735a7f3bd8863acf763bdac15b76fac9b2a Mon Sep 17 00:00:00 2001 From: Saloni Shah Date: Fri, 16 Sep 2022 15:07:59 -0400 Subject: [PATCH 084/326] [BW-1398] Migrate PKs to BIGINT (#6907) --- CHANGELOG.md | 10 +++ .../src/main/resources/changelog.xml | 3 + .../enlarge_docker_hash_store_entry_id.xml | 41 ++++++++++ .../enlarge_sub_workflow_store_entry_id.xml | 41 ++++++++++ .../enlarge_workflow_store_entry_id.xml | 82 +++++++++++++++++++ .../DockerHashStoreEntryComponent.scala | 2 +- .../SubWorkflowStoreEntryComponent.scala | 6 +- .../tables/WorkflowStoreEntryComponent.scala | 2 +- .../sql/tables/DockerHashStoreEntry.scala | 2 +- .../sql/tables/SubWorkflowStoreEntry.scala | 4 +- .../sql/tables/WorkflowStoreEntry.scala | 2 +- 11 files changed, 186 insertions(+), 9 deletions(-) create mode 100644 database/migration/src/main/resources/changesets/enlarge_docker_hash_store_entry_id.xml create mode 100644 database/migration/src/main/resources/changesets/enlarge_sub_workflow_store_entry_id.xml create mode 100644 database/migration/src/main/resources/changesets/enlarge_workflow_store_entry_id.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index e1158be3ca4..4dcab0dfa3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Cromwell Change Log +## 85 Release Notes + +### Migration of PKs to BIGINT + +The PK of below tables will be migrated from INT to BIGINT. Also, since `ROOT_WORKFLOW_ID` in `SUB_WORKFLOW_STORE_ENTRY` is a FK to `WORKFLOW_STORE_ENTRY_ID` in `WORKFLOW_STORE_ENTRY` +it is also being migrated from INT to BIGINT. +* DOCKER_HASH_STORE_ENTRY +* WORKFLOW_STORE_ENTRY +* SUB_WORKFLOW_STORE_ENTRY + ## 84 Release Notes ### CromIAM enabled user checks diff --git a/database/migration/src/main/resources/changelog.xml b/database/migration/src/main/resources/changelog.xml index 5d711061c31..49d568766b3 100644 --- a/database/migration/src/main/resources/changelog.xml +++ b/database/migration/src/main/resources/changelog.xml @@ -87,6 +87,9 @@ + + + diff --git a/database/migration/src/main/resources/changesets/enlarge_docker_hash_store_entry_id.xml b/database/migration/src/main/resources/changesets/enlarge_docker_hash_store_entry_id.xml new file mode 100644 index 00000000000..fc7992268e6 --- /dev/null +++ b/database/migration/src/main/resources/changesets/enlarge_docker_hash_store_entry_id.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + SELECT count(*) + FROM information_schema.sequences + WHERE sequence_name = 'DOCKER_HASH_STORE_ENTRY_DOCKER_HASH_STORE_ENTRY_ID_seq' + AND data_type = 'bigint'; + + + alter sequence "DOCKER_HASH_STORE_ENTRY_DOCKER_HASH_STORE_ENTRY_ID_seq" as bigint; + + + + diff --git a/database/migration/src/main/resources/changesets/enlarge_sub_workflow_store_entry_id.xml b/database/migration/src/main/resources/changesets/enlarge_sub_workflow_store_entry_id.xml new file mode 100644 index 00000000000..57894db824b --- /dev/null +++ b/database/migration/src/main/resources/changesets/enlarge_sub_workflow_store_entry_id.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + SELECT count(*) + FROM information_schema.sequences + WHERE sequence_name = 'SUB_WORKFLOW_STORE_ENTRY_SUB_WORKFLOW_STORE_ENTRY_ID_seq' + AND data_type = 'bigint'; + + + alter sequence "SUB_WORKFLOW_STORE_ENTRY_SUB_WORKFLOW_STORE_ENTRY_ID_seq" as bigint; + + + + diff --git a/database/migration/src/main/resources/changesets/enlarge_workflow_store_entry_id.xml b/database/migration/src/main/resources/changesets/enlarge_workflow_store_entry_id.xml new file mode 100644 index 00000000000..ea2b498fd51 --- /dev/null +++ b/database/migration/src/main/resources/changesets/enlarge_workflow_store_entry_id.xml @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + SELECT count(*) + FROM information_schema.sequences + WHERE sequence_name = 'WORKFLOW_STORE_ENTRY_WORKFLOW_STORE_ENTRY_ID_seq' + AND data_type = 'bigint'; + + + alter sequence "WORKFLOW_STORE_ENTRY_WORKFLOW_STORE_ENTRY_ID_seq" as bigint; + + + + + + + + + + + + + + + + + + + + + + diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/DockerHashStoreEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/DockerHashStoreEntryComponent.scala index e258b40f3b7..5929b2e5a6a 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/DockerHashStoreEntryComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/DockerHashStoreEntryComponent.scala @@ -9,7 +9,7 @@ trait DockerHashStoreEntryComponent { import driver.api._ class DockerHashStoreEntries(tag: Tag) extends Table[DockerHashStoreEntry](tag, "DOCKER_HASH_STORE_ENTRY") { - def dockerHashStoreEntryId = column[Int]("DOCKER_HASH_STORE_ENTRY_ID", O.PrimaryKey, O.AutoInc) + def dockerHashStoreEntryId = column[Long]("DOCKER_HASH_STORE_ENTRY_ID", O.PrimaryKey, O.AutoInc) def workflowExecutionUuid = column[String]("WORKFLOW_EXECUTION_UUID", O.Length(255)) diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala index 2ea7b36782c..11fa2191cb2 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala @@ -10,9 +10,9 @@ trait SubWorkflowStoreEntryComponent { import driver.api._ class SubWorkflowStoreEntries(tag: Tag) extends Table[SubWorkflowStoreEntry](tag, "SUB_WORKFLOW_STORE_ENTRY") { - def subWorkflowStoreEntryId = column[Int]("SUB_WORKFLOW_STORE_ENTRY_ID", O.PrimaryKey, O.AutoInc) + def subWorkflowStoreEntryId = column[Long]("SUB_WORKFLOW_STORE_ENTRY_ID", O.PrimaryKey, O.AutoInc) - def rootWorkflowId = column[Int]("ROOT_WORKFLOW_ID") + def rootWorkflowId = column[Long]("ROOT_WORKFLOW_ID") def parentWorkflowExecutionUuid = column[String]("PARENT_WORKFLOW_EXECUTION_UUID", O.Length(255)) @@ -40,7 +40,7 @@ trait SubWorkflowStoreEntryComponent { val subWorkflowStoreEntryIdsAutoInc = subWorkflowStoreEntries returning subWorkflowStoreEntries.map(_.subWorkflowStoreEntryId) val subWorkflowStoreEntriesForRootWorkflowId = Compiled( - (rootWorkflowId: Rep[Int]) => for { + (rootWorkflowId: Rep[Long]) => for { subWorkflowStoreEntry <- subWorkflowStoreEntries if subWorkflowStoreEntry.rootWorkflowId === rootWorkflowId } yield subWorkflowStoreEntry diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala index 0e29cee651f..847fa862405 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala @@ -11,7 +11,7 @@ trait WorkflowStoreEntryComponent { import driver.api._ class WorkflowStoreEntries(tag: Tag) extends Table[WorkflowStoreEntry](tag, "WORKFLOW_STORE_ENTRY") { - def workflowStoreEntryId = column[Int]("WORKFLOW_STORE_ENTRY_ID", O.PrimaryKey, O.AutoInc) + def workflowStoreEntryId = column[Long]("WORKFLOW_STORE_ENTRY_ID", O.PrimaryKey, O.AutoInc) def workflowExecutionUuid = column[String]("WORKFLOW_EXECUTION_UUID", O.Length(255)) diff --git a/database/sql/src/main/scala/cromwell/database/sql/tables/DockerHashStoreEntry.scala b/database/sql/src/main/scala/cromwell/database/sql/tables/DockerHashStoreEntry.scala index a4a5b1247ee..542558324f5 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/tables/DockerHashStoreEntry.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/tables/DockerHashStoreEntry.scala @@ -6,5 +6,5 @@ case class DockerHashStoreEntry dockerTag: String, dockerHash: String, dockerSize: Option[Long], - dockerHashStoreEntryId: Option[Int] = None + dockerHashStoreEntryId: Option[Long] = None ) diff --git a/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala b/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala index 96ac39aa672..4cf89381ad0 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala @@ -2,11 +2,11 @@ package cromwell.database.sql.tables case class SubWorkflowStoreEntry ( - rootWorkflowId: Option[Int], + rootWorkflowId: Option[Long], parentWorkflowExecutionUuid: String, callFullyQualifiedName: String, callIndex: Int, callAttempt: Int, subWorkflowExecutionUuid: String, - subWorkflowStoreEntryId: Option[Int] = None + subWorkflowStoreEntryId: Option[Long] = None ) diff --git a/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala b/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala index 6c03db87014..efb594444d4 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala @@ -21,5 +21,5 @@ case class WorkflowStoreEntry cromwellId: Option[String], heartbeatTimestamp: Option[Timestamp], hogGroup: Option[String], - workflowStoreEntryId: Option[Int] = None + workflowStoreEntryId: Option[Long] = None ) From e3f1ad0335b763056e598d97c64cc7f0186c68f8 Mon Sep 17 00:00:00 2001 From: Christian Freitas Date: Tue, 20 Sep 2022 08:19:10 -0400 Subject: [PATCH 085/326] BT-745 Batch 2 of scala steward updates (#6906) * Update SBT to 2.0.0 * Fix sbt-git import * Update mouse to 1.0.11 * Update rhino 1.7.14 --- project/Dependencies.scala | 6 ++++-- project/Version.scala | 3 ++- project/plugins.sbt | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index fadd662d2f3..21c50b77e10 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -77,7 +77,8 @@ object Dependencies { private val mockFtpServerV = "3.0.0" private val mockitoV = "3.12.4" private val mockserverNettyV = "5.14.0" - private val mouseV = "1.0.10" + private val mouseV = "1.0.11" + /* Newer version 8.0.29 fails `Control characters should work with metadata` Centaur tests, has charset changes mentioned in release notes https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-29.html#mysqld-8-0-29-charset @@ -89,7 +90,8 @@ object Dependencies { private val pprintV = "0.7.1" private val rdf4jV = "3.7.1" private val refinedV = "0.9.29" - private val rhinoV = "1.7.13" + private val rhinoV = "1.7.14" + private val scalaCollectionCompatV = "2.5.0" private val scalaGraphV = "1.13.1" private val scalaLoggingV = "3.9.4" diff --git a/project/Version.scala b/project/Version.scala index c0bdb0b3453..dbe795e6f0c 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -1,7 +1,8 @@ import Dependencies._ -import com.typesafe.sbt.SbtGit._ +import com.github.sbt.git.SbtGit._ import sbt.Keys._ import sbt._ +import com.github.sbt.git.SbtGit object Version { // Upcoming release, or current if we're on a master / hotfix branch diff --git a/project/plugins.sbt b/project/plugins.sbt index 51289a45e53..cef75cd05b3 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,5 +1,5 @@ addSbtPlugin("se.marcuslonnberg" % "sbt-docker" % "1.9.0") addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "1.1.0") -addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.2") +addSbtPlugin("com.github.sbt" % "sbt-git" % "2.0.0") addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.3") addDependencyTreePlugin From 68949364c7ffab8450116baa8e2a4e276bb16d70 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 23 Sep 2022 11:08:10 -0400 Subject: [PATCH 086/326] SUP-692 Retry with more memory after RC 137 (#6912) * Reorder execution result checks so 137 can retry with more memory * Test for memory retry after 137 RC * Fix test expectations * Make memory retry checks consistent * Revert changes to existing test * Rename retryWithMoreMemory to outOfMemoryDetected --- .../StandardAsyncExecutionActor.scala | 23 +++++++++++-------- .../retry_with_more_memory_after_137.wdl | 22 ++++++++++++++++++ .../retry_with_more_memory_after_137.test | 21 +++++++++++++++++ 3 files changed, 57 insertions(+), 9 deletions(-) create mode 100644 centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_after_137.wdl create mode 100644 centaur/src/main/resources/standardTestCases/retry_with_more_memory_after_137.test diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala index 0f1e2c931a8..c98e429c63d 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardAsyncExecutionActor.scala @@ -244,6 +244,8 @@ trait StandardAsyncExecutionActor } } + lazy val memoryRetryRequested: Boolean = memoryRetryFactor.nonEmpty + /** * Returns the shell scripting for finding all files listed within a directory. * @@ -1259,6 +1261,7 @@ trait StandardAsyncExecutionActor def handleExecutionResult(status: StandardAsyncRunState, oldHandle: StandardAsyncPendingExecutionHandle): Future[ExecutionHandle] = { + // Returns true if the task has written an RC file that indicates OOM, false otherwise def memoryRetryRC: Future[Boolean] = { def returnCodeAsBoolean(codeAsOption: Option[String]): Boolean = { codeAsOption match { @@ -1299,11 +1302,11 @@ trait StandardAsyncExecutionActor // Only check stderr size if we need to, otherwise this results in a lot of unnecessary I/O that // may fail due to race conditions on quickly-executing jobs. stderrSize <- if (failOnStdErr) asyncIo.sizeAsync(stderr) else Future.successful(0L) - retryWithMoreMemory <- memoryRetryRC - } yield (stderrSize, returnCodeAsString, retryWithMoreMemory) + outOfMemoryDetected <- memoryRetryRC + } yield (stderrSize, returnCodeAsString, outOfMemoryDetected) stderrSizeAndReturnCodeAndMemoryRetry flatMap { - case (stderrSize, returnCodeAsString, retryWithMoreMemory) => + case (stderrSize, returnCodeAsString, outOfMemoryDetected) => val tryReturnCodeAsInt = Try(returnCodeAsString.trim.toInt) if (isDone(status)) { @@ -1311,13 +1314,15 @@ trait StandardAsyncExecutionActor case Success(returnCodeAsInt) if failOnStdErr && stderrSize.intValue > 0 => val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(StderrNonEmpty(jobDescriptor.key.tag, stderrSize, stderrAsOption), Option(returnCodeAsInt), None)) retryElseFail(executionHandle) - case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) => - Future.successful(AbortedExecutionHandle) case Success(returnCodeAsInt) if continueOnReturnCode.continueFor(returnCodeAsInt) => handleExecutionSuccess(status, oldHandle, returnCodeAsInt) - case Success(returnCodeAsInt) if retryWithMoreMemory => + // It's important that we check retryWithMoreMemory case before isAbort. RC could be 137 in either case; + // if it was caused by OOM killer, want to handle as OOM and not job abort. + case Success(returnCodeAsInt) if outOfMemoryDetected && memoryRetryRequested => val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), Option(returnCodeAsInt), None)) - retryElseFail(executionHandle, retryWithMoreMemory) + retryElseFail(executionHandle, outOfMemoryDetected) + case Success(returnCodeAsInt) if isAbort(returnCodeAsInt) => + Future.successful(AbortedExecutionHandle) case Success(returnCodeAsInt) => val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(WrongReturnCode(jobDescriptor.key.tag, returnCodeAsInt, stderrAsOption), Option(returnCodeAsInt), None)) retryElseFail(executionHandle) @@ -1326,9 +1331,9 @@ trait StandardAsyncExecutionActor } } else { tryReturnCodeAsInt match { - case Success(returnCodeAsInt) if retryWithMoreMemory && !continueOnReturnCode.continueFor(returnCodeAsInt) => + case Success(returnCodeAsInt) if outOfMemoryDetected && memoryRetryRequested && !continueOnReturnCode.continueFor(returnCodeAsInt) => val executionHandle = Future.successful(FailedNonRetryableExecutionHandle(RetryWithMoreMemory(jobDescriptor.key.tag, stderrAsOption, memoryRetryErrorKeys, log), Option(returnCodeAsInt), None)) - retryElseFail(executionHandle, retryWithMoreMemory) + retryElseFail(executionHandle, outOfMemoryDetected) case _ => val failureStatus = handleExecutionFailure(status, tryReturnCodeAsInt.toOption) retryElseFail(failureStatus) diff --git a/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_after_137.wdl b/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_after_137.wdl new file mode 100644 index 00000000000..2fe434475c6 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/retry_with_more_memory/retry_with_more_memory_after_137.wdl @@ -0,0 +1,22 @@ +version 1.0 + +task imitate_oom_error { + command { + printf "Exception in thread "main" java.lang.OutOfMemoryError: testing\n\tat Test.main(Test.java:1)\n" >&2 + touch foo + exit 137 + } + output { + File foo = "foo" + } + runtime { + docker: "python:latest" + memory: "1 GB" + maxRetries: 2 + backend: "Papiv2" + } +} + +workflow retry_with_more_memory_after_137 { + call imitate_oom_error +} diff --git a/centaur/src/main/resources/standardTestCases/retry_with_more_memory_after_137.test b/centaur/src/main/resources/standardTestCases/retry_with_more_memory_after_137.test new file mode 100644 index 00000000000..a69290ca511 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/retry_with_more_memory_after_137.test @@ -0,0 +1,21 @@ +name: retry_with_more_memory_after_137 +testFormat: workflowfailure +backends: [Papiv2] + +files { + workflow: retry_with_more_memory/retry_with_more_memory_after_137.wdl + options: retry_with_more_memory/retry_with_more_memory.options +} + +metadata { + workflowName: retry_with_more_memory_after_137 + status: Failed + "failures.0.message": "Workflow failed" + "failures.0.causedBy.0.message": "stderr for job `retry_with_more_memory_after_137.imitate_oom_error:NA:3` contained one of the `memory-retry-error-keys: [OutOfMemory,Killed]` specified in the Cromwell config. Job might have run out of memory." + "retry_with_more_memory_after_137.imitate_oom_error.-1.1.executionStatus": "RetryableFailure" + "retry_with_more_memory_after_137.imitate_oom_error.-1.1.runtimeAttributes.memory": "1 GB" + "retry_with_more_memory_after_137.imitate_oom_error.-1.2.executionStatus": "RetryableFailure" + "retry_with_more_memory_after_137.imitate_oom_error.-1.2.runtimeAttributes.memory": "1.1 GB" + "retry_with_more_memory_after_137.imitate_oom_error.-1.3.executionStatus": "Failed" + "retry_with_more_memory_after_137.imitate_oom_error.-1.3.runtimeAttributes.memory": "1.2100000000000002 GB" +} From 6227be973ea81759381066f44b5193752ac81c3c Mon Sep 17 00:00:00 2001 From: Christian Freitas Date: Tue, 27 Sep 2022 15:02:04 -0400 Subject: [PATCH 087/326] Scala steward updates batch 3 (#6913) * Scala steward updates batch 3 --- project/Dependencies.scala | 8 ++++---- project/GenerateRestApiDocs.scala | 9 ++++----- project/swagger2markup.sbt | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 21c50b77e10..7a8f34daf51 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -65,7 +65,7 @@ object Dependencies { private val kindProjectorV = "0.13.2" private val kittensV = "2.3.2" private val liquibaseV = "4.8.0" - private val logbackV = "1.2.10" + private val logbackV = "1.2.11" private val lz4JavaV = "1.8.0" private val mariadbV = "2.7.4" /* @@ -87,14 +87,14 @@ object Dependencies { private val nettyV = "4.1.72.Final" private val owlApiV = "5.1.19" private val postgresV = "42.3.3" - private val pprintV = "0.7.1" + private val pprintV = "0.7.3" private val rdf4jV = "3.7.1" private val refinedV = "0.9.29" private val rhinoV = "1.7.14" private val scalaCollectionCompatV = "2.5.0" private val scalaGraphV = "1.13.1" - private val scalaLoggingV = "3.9.4" + private val scalaLoggingV = "3.9.5" private val scalaPoolV = "0.4.3" private val scalacticV = "3.2.10" private val scalameterV = "0.19" @@ -123,7 +123,7 @@ object Dependencies { private val sttpV = "1.7.2" private val swaggerParserV = "1.0.56" private val swaggerUiV = "4.5.0" - private val testContainersScalaV = "0.40.2" + private val testContainersScalaV = "0.40.10" private val tikaV = "2.3.0" private val typesafeConfigV = "1.4.1" private val workbenchGoogleV = "0.21-5c9c4f6" // via: https://github.com/broadinstitute/workbench-libs/blob/develop/google/CHANGELOG.md diff --git a/project/GenerateRestApiDocs.scala b/project/GenerateRestApiDocs.scala index afb1168fa5c..9173a25c3c5 100644 --- a/project/GenerateRestApiDocs.scala +++ b/project/GenerateRestApiDocs.scala @@ -1,13 +1,12 @@ -import java.time.OffsetDateTime -import java.time.format.DateTimeFormatter - -import io.github.swagger2markup.Swagger2MarkupConverter import io.github.swagger2markup.builder.Swagger2MarkupConfigBuilder -import io.github.swagger2markup.markup.builder.MarkupLanguage +import io.github.swagger2markup.{MarkupLanguage, Swagger2MarkupConverter} import org.apache.commons.lang3.{ClassUtils, StringUtils} import sbt.Keys._ import sbt._ +import java.time.OffsetDateTime +import java.time.format.DateTimeFormatter + /** * Provides a task to generate the REST API markdown from the Swagger YAML. * diff --git a/project/swagger2markup.sbt b/project/swagger2markup.sbt index 96f836bcc72..1fa882328f1 100644 --- a/project/swagger2markup.sbt +++ b/project/swagger2markup.sbt @@ -14,7 +14,7 @@ val jacksonV = "2.8.4" val javaslangV = "2.0.5" val plexusUtilsV = "3.0.22" val slf4jV = "1.7.21" -val swagger2markupV = "1.3.3" +val swagger2markupV = "1.3.4" resolvers ++= List( "JCenter" at "https://jcenter.bintray.com" From f052c788ebe7adcef9d44677953957602777b514 Mon Sep 17 00:00:00 2001 From: Christian Freitas Date: Fri, 30 Sep 2022 09:31:06 -0400 Subject: [PATCH 088/326] WX-745 Batch 4 scala steward updates (#6916) --- project/Dependencies.scala | 18 +++++++++--------- project/plugins.sbt | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 7a8f34daf51..e85188a2ff1 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -6,7 +6,7 @@ object Dependencies { private val akkaV = "2.5.32" // scala-steward:off (CROM-6637) private val ammoniteOpsV = "2.4.1" private val apacheHttpClientV = "4.5.13" - private val awsSdkV = "2.17.194" + private val awsSdkV = "2.17.265" // We would like to use the BOM to manage Azure SDK versions, but SBT doesn't support it. // https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/boms/azure-sdk-bom // https://github.com/sbt/sbt/issues/4531 @@ -43,7 +43,7 @@ object Dependencies { private val googleCloudMonitoringV = "3.2.5" private val googleCloudNioV = "0.124.8" private val googleCloudStorageV = "2.9.2" - private val googleGaxGrpcV = "2.12.2" + private val googleGaxGrpcV = "2.19.0" // latest date via: https://mvnrepository.com/artifact/com.google.apis/google-api-services-genomics private val googleGenomicsServicesV2Alpha1ApiV = "v2alpha1-rev20210811-1.32.1" private val googleHttpClientApacheV = "2.1.2" @@ -53,13 +53,13 @@ object Dependencies { private val googleOauth2V = "1.5.3" private val googleOauthClientV = "1.33.1" private val googleCloudResourceManagerV = "1.2.5" - private val grpcV = "1.45.0" + private val grpcV = "1.45.1" private val guavaV = "31.0.1-jre" private val heterodonV = "1.0.0-beta3" private val hsqldbV = "2.6.1" private val http4sV = "0.21.31" // this release is EOL. We need to upgrade further for cats3. https://http4s.org/versions/ private val jacksonV = "2.13.3" - private val janinoV = "3.1.6" + private val janinoV = "3.1.7" private val jsr305V = "3.0.2" private val junitV = "4.13.2" private val kindProjectorV = "0.13.2" @@ -96,12 +96,12 @@ object Dependencies { private val scalaGraphV = "1.13.1" private val scalaLoggingV = "3.9.5" private val scalaPoolV = "0.4.3" - private val scalacticV = "3.2.10" - private val scalameterV = "0.19" + private val scalacticV = "3.2.13" + private val scalameterV = "0.21" private val scalatestV = "3.2.10" private val scalatestScalacheckV = scalatestV + ".0" private val scoptV = "4.1.0" - private val sentryLogbackV = "5.2.4" + private val sentryLogbackV = "5.7.4" private val shapelessV = "2.3.9" private val simulacrumV = "1.0.1" private val slf4jV = "1.7.32" @@ -122,10 +122,10 @@ object Dependencies { private val sprayJsonV = "1.3.6" private val sttpV = "1.7.2" private val swaggerParserV = "1.0.56" - private val swaggerUiV = "4.5.0" + private val swaggerUiV = "4.5.2" private val testContainersScalaV = "0.40.10" private val tikaV = "2.3.0" - private val typesafeConfigV = "1.4.1" + private val typesafeConfigV = "1.4.2" private val workbenchGoogleV = "0.21-5c9c4f6" // via: https://github.com/broadinstitute/workbench-libs/blob/develop/google/CHANGELOG.md private val workbenchModelV = "0.15-f9f0d4c" // via: https://github.com/broadinstitute/workbench-libs/blob/develop/model/CHANGELOG.md private val workbenchUtilV = "0.6-65bba14" // via: https://github.com/broadinstitute/workbench-libs/blob/develop/util/CHANGELOG.md diff --git a/project/plugins.sbt b/project/plugins.sbt index cef75cd05b3..dd6a98e1b9b 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,5 +1,5 @@ addSbtPlugin("se.marcuslonnberg" % "sbt-docker" % "1.9.0") -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "1.1.0") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "1.1.1") addSbtPlugin("com.github.sbt" % "sbt-git" % "2.0.0") addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.3") addDependencyTreePlugin From 443a6fc265ed5cae3d697eac0bec79d31eadb7be Mon Sep 17 00:00:00 2001 From: Brian Reilly Date: Fri, 30 Sep 2022 14:36:50 -0400 Subject: [PATCH 089/326] WX-746 Localize all DRS inputs in a single Action (#6914) Co-authored-by: Janet Gainer-Dewar --- CHANGELOG.md | 22 +++++ .../drs/localizer/CommandLineParser.scala | 39 ++++++-- .../drs/localizer/DrsLocalizerMain.scala | 25 ++++- .../AzureB2CAccessTokenStrategy.scala | 2 +- .../drs/localizer/CommandLineParserSpec.scala | 71 +++++++++++++- project/Dependencies.scala | 1 + ...inesApiAsyncBackendJobExecutionActor.scala | 4 + .../common/PipelinesApiJobPaths.scala | 1 + ...inesApiAsyncBackendJobExecutionActor.scala | 28 +++++- .../PipelinesParameterConversions.scala | 23 +---- .../pipelines/v2beta/api/Localization.scala | 53 +++++++++- ...ApiAsyncBackendJobExecutionActorSpec.scala | 49 ++++++++++ .../v2beta/PipelinesConversionsSpec.scala | 96 ------------------- .../v2beta/api/LocalizationSpec.scala | 67 +++++++++++++ 14 files changed, 351 insertions(+), 130 deletions(-) delete mode 100644 supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/PipelinesConversionsSpec.scala create mode 100644 supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/LocalizationSpec.scala diff --git a/CHANGELOG.md b/CHANGELOG.md index 4dcab0dfa3f..351adccf651 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,28 @@ it is also being migrated from INT to BIGINT. * WORKFLOW_STORE_ENTRY * SUB_WORKFLOW_STORE_ENTRY +### Improvement to "retry with more memory" behavior + +Cromwell will now retry a task with more memory after it fails with return code 137, provided all +the other requirements for retrying with more memory are met. + +### Support for invoking `CromwellDRSLocalizer` with manifest file + +`CromwellDRSLocalizer` can now handle multiple file localizations in a single invocation. Users can provide a +manifest file containing multiple (DRS id, local container path) pairs in CSV format, and they will be localized in +sequence, with the program exiting if any fail. +``` +java -jar /path/to/localizer.jar [options] -m /local/path/to/manifest/file.txt +``` + +The previous method of passing in a single DRS file and container destination using positional arguments is still +supported. + +### Improvement to DRS localization in GCP papiv2beta backend + +All DRS inputs to a task are now localized in a single PAPI action, which should improve speed and resolve +failures observed when attempting to localize a large number of DRS files. + ## 84 Release Notes ### CromIAM enabled user checks diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/CommandLineParser.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/CommandLineParser.scala index df0f7c2eba6..b200d4b18de 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/CommandLineParser.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/CommandLineParser.scala @@ -14,15 +14,25 @@ class CommandLineParser extends scopt.OptionParser[CommandLineArguments](Usage) head("cromwell-drs-localizer", localizerVersion) - arg[String]("drs-object-id").text("DRS object ID").required(). + arg[String]("drs-object-id").text("DRS object ID").optional(). action((s, c) => c.copy(drsObject = Option(s))) - arg[String]("container-path").text("Container path").required(). + arg[String]("container-path").text("Container path").optional(). action((s, c) => c.copy(containerPath = Option(s))) - arg[String]("requester-pays-project").text("Requester pays project").optional(). + arg[String]("requester-pays-project").text(s"Requester pays project (only valid with '$Google' auth strategy)").optional(). action((s, c) => c.copy(googleRequesterPaysProject = Option(s))) + opt[String]('m', "manifest-path").text("File path of manifest containing multiple files to localize"). + action((s, c) => + c.copy(manifestPath = Option(s))) + opt[String]('r', "requester-pays-project").text(s"Requester pays project (only valid with '$Google' auth strategy)").optional(). + action((s, c) => { + c.copy( + googleRequesterPaysProject = Option(s), + googleRequesterPaysProjectConflict = c.googleRequesterPaysProject.exists(_ != s) + ) + }) opt[String]('t', "access-token-strategy").text(s"Access token strategy, must be one of '$Azure' or '$Google' (default '$Google')"). action((s, c) => c.copy(accessTokenStrategy = Option(s.toLowerCase()))) @@ -35,6 +45,11 @@ class CommandLineParser extends scopt.OptionParser[CommandLineArguments](Usage) opt[String]('i', "identity-client-id").text("Azure identity client id"). action((s, c) => c.copy(azureIdentityClientId = Option(s))) + checkConfig(c => + if (c.googleRequesterPaysProjectConflict) + failure("Requester pays project differs between positional argument and option flag") + else success + ) checkConfig(c => c.accessTokenStrategy match { case Some(Azure) if c.googleRequesterPaysProject.nonEmpty => @@ -48,6 +63,13 @@ class CommandLineParser extends scopt.OptionParser[CommandLineArguments](Usage) case None => Left("Programmer error, access token strategy should not be None") } ) + checkConfig(c => + (c.drsObject, c.containerPath, c.manifestPath) match { + case (Some(_), Some(_), None) => Right(()) + case (None, None, Some(_)) => Right(()) + case _ => Left("Must provide either DRS path and container path, OR manifest file (-m).") + } + ) } object CommandLineParser { @@ -64,9 +86,12 @@ object CommandLineParser { val Usage = s""" Usage: - java -jar /path/to/localizer.jar [options] drs://provider/object /local/path/to/file.txt [requester pays project] - Note that the optional argument is only valid with access token strategy 'Google'. + Can be run to localize a single file with DRS id and local container path provided in args: + java -jar /path/to/localizer.jar [options] drs://provider/object /local/path/to/file.txt + + Can also be used to localize multiple files in one invocation with manifest file provided in args: + java -jar /path/to/localizer.jar [options] -m /local/path/to/manifest/file """ } @@ -77,4 +102,6 @@ case class CommandLineArguments(accessTokenStrategy: Option[String] = Option(Goo googleRequesterPaysProject: Option[String] = None, azureVaultName: Option[String] = None, azureSecretName: Option[String] = None, - azureIdentityClientId: Option[String] = None) + azureIdentityClientId: Option[String] = None, + manifestPath: Option[String] = None, + googleRequesterPaysProjectConflict: Boolean = false) diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala index c7ecdd3f3e4..05ecf0ce48c 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/DrsLocalizerMain.scala @@ -2,6 +2,7 @@ package drs.localizer import cats.data.NonEmptyList import cats.effect.{ExitCode, IO, IOApp} +import cats.implicits._ import cloud.nio.impl.drs.DrsPathResolver.{FatalRetryDisposition, RegularRetryDisposition} import cloud.nio.impl.drs.{AccessUrl, DrsConfig, DrsPathResolver, MarthaField} import cloud.nio.spi.{CloudNioBackoff, CloudNioSimpleExponentialBackoff} @@ -10,8 +11,12 @@ import drs.localizer.CommandLineParser.AccessTokenStrategy.{Azure, Google} import drs.localizer.accesstokens.{AccessTokenStrategy, AzureB2CAccessTokenStrategy, GoogleAccessTokenStrategy} import drs.localizer.downloaders.AccessUrlDownloader.Hashes import drs.localizer.downloaders._ +import org.apache.commons.csv.{CSVFormat, CSVParser} +import java.io.File +import java.nio.charset.Charset import scala.concurrent.duration._ +import scala.jdk.CollectionConverters._ import scala.language.postfixOps object DrsLocalizerMain extends IOApp with StrictLogging { @@ -51,8 +56,24 @@ object DrsLocalizerMain extends IOApp with StrictLogging { } def runLocalizer(commandLineArguments: CommandLineArguments, accessTokenStrategy: AccessTokenStrategy): IO[ExitCode] = { - val drsObject = commandLineArguments.drsObject.get - val containerPath = commandLineArguments.containerPath.get + commandLineArguments.manifestPath match { + case Some(manifestPath) => + val manifestFile = new File(manifestPath) + val csvParser = CSVParser.parse(manifestFile, Charset.defaultCharset(), CSVFormat.DEFAULT) + val exitCodes: IO[List[ExitCode]] = csvParser.asScala.map(record => { + val drsObject = record.get(0) + val containerPath = record.get(1) + localizeFile(commandLineArguments, accessTokenStrategy, drsObject, containerPath) + }).toList.sequence + exitCodes.map(_.find(_ != ExitCode.Success).getOrElse(ExitCode.Success)) + case None => + val drsObject = commandLineArguments.drsObject.get + val containerPath = commandLineArguments.containerPath.get + localizeFile(commandLineArguments, accessTokenStrategy, drsObject, containerPath) + } + } + + private def localizeFile(commandLineArguments: CommandLineArguments, accessTokenStrategy: AccessTokenStrategy, drsObject: String, containerPath: String) = { new DrsLocalizerMain(drsObject, containerPath, accessTokenStrategy, commandLineArguments.googleRequesterPaysProject). resolveAndDownloadWithRetries(downloadRetries = 3, checksumRetries = 1, defaultDownloaderFactory, Option(defaultBackoff)).map(_.exitCode) } diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/accesstokens/AzureB2CAccessTokenStrategy.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/accesstokens/AzureB2CAccessTokenStrategy.scala index cf83171adf0..1634815d1f4 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/accesstokens/AzureB2CAccessTokenStrategy.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/accesstokens/AzureB2CAccessTokenStrategy.scala @@ -10,7 +10,7 @@ import drs.localizer.CommandLineArguments case class AzureB2CAccessTokenStrategy(commandLineArguments: CommandLineArguments) extends AccessTokenStrategy { override def getAccessToken(): ErrorOr[String] = { commandLineArguments match { - case CommandLineArguments(_, _, _, _, Some(vault), Some(secret), clientId) => + case CommandLineArguments(_, _, _, _, Some(vault), Some(secret), clientId, _, _) => AzureKeyVaultClient(vault, clientId) flatMap { _.getSecret(secret) } case invalid => s"Invalid command line arguments: $invalid".invalidNel } diff --git a/cromwell-drs-localizer/src/test/scala/drs/localizer/CommandLineParserSpec.scala b/cromwell-drs-localizer/src/test/scala/drs/localizer/CommandLineParserSpec.scala index 3f7b96f5edc..349b2d6887b 100644 --- a/cromwell-drs-localizer/src/test/scala/drs/localizer/CommandLineParserSpec.scala +++ b/cromwell-drs-localizer/src/test/scala/drs/localizer/CommandLineParserSpec.scala @@ -16,6 +16,7 @@ class CommandLineParserSpec extends AnyFlatSpec with CromwellTimeoutSpec with Ma private val azureVaultName = "Kwikset" private val azureSecretName = "shhh" private val azureIdentityClientId = "itme@azure.com" + private val manifestPath = "/my/manifest.txt" behavior of "DRS Localizer command line parser" @@ -41,10 +42,36 @@ class CommandLineParserSpec extends AnyFlatSpec with CromwellTimeoutSpec with Ma args.azureVaultName shouldBe empty args.azureSecretName shouldBe empty args.azureIdentityClientId shouldBe empty + args.manifestPath shouldBe empty } it should "successfully parse with three arguments" in { val args = parser.parse(Array(drsObject, containerPath, requesterPaysProject), CommandLineArguments()).get + args.drsObject.get shouldBe drsObject + args.containerPath.get shouldBe containerPath + args.accessTokenStrategy.get shouldBe AccessTokenStrategy.Google + args.googleRequesterPaysProject.get shouldBe requesterPaysProject + args.azureVaultName shouldBe empty + args.azureSecretName shouldBe empty + args.azureIdentityClientId shouldBe empty + args.manifestPath shouldBe empty + } + + it should "successfully parse requester pays project" in { + val args = parser.parse(Array(drsObject, containerPath, "-r", requesterPaysProject), CommandLineArguments()).get + + args.drsObject.get shouldBe drsObject + args.containerPath.get shouldBe containerPath + args.accessTokenStrategy.get shouldBe AccessTokenStrategy.Google + args.googleRequesterPaysProject.get shouldBe requesterPaysProject + args.azureVaultName shouldBe empty + args.azureSecretName shouldBe empty + args.azureIdentityClientId shouldBe empty + args.manifestPath shouldBe empty + } + + it should "successfully parse with three arguments and requester pays project" in { + val args = parser.parse(Array(drsObject, containerPath, requesterPaysProject, "-r", requesterPaysProject), CommandLineArguments()).get args.drsObject.get shouldBe drsObject args.containerPath.get shouldBe containerPath @@ -53,10 +80,43 @@ class CommandLineParserSpec extends AnyFlatSpec with CromwellTimeoutSpec with Ma args.azureVaultName shouldBe empty args.azureSecretName shouldBe empty args.azureIdentityClientId shouldBe empty + args.manifestPath shouldBe empty + } + + it should "fail if requester pays argument and flag specify different projects" in { + parser.parse(Array(drsObject, containerPath, requesterPaysProject, "-r", "boom!"), CommandLineArguments()) shouldBe None + } + + it should "successfully parse args with a manifest file" in { + val args = parser.parse(Array("-m", manifestPath), CommandLineArguments()).get + + args.drsObject shouldBe empty + args.containerPath shouldBe empty + args.accessTokenStrategy.get shouldBe AccessTokenStrategy.Google + args.googleRequesterPaysProject shouldBe empty + args.azureVaultName shouldBe empty + args.azureSecretName shouldBe empty + args.azureIdentityClientId shouldBe empty + args.manifestPath.get shouldBe manifestPath + } + + it should "fail to parse with a manifest file and one single-file arg" in { + val args = parser.parse(Array(drsObject, "--manifest-path", manifestPath), CommandLineArguments()) + args shouldBe None } - it should "successfully parse an explicit Google access token strategy invocation" in { - val args = parser.parse(Array("--access-token-strategy", "google", drsObject, containerPath, requesterPaysProject), CommandLineArguments()).get + it should "fail to parse with a manifest file and two single-file args" in { + val args = parser.parse(Array(drsObject, containerPath, "--manifest-path", manifestPath), CommandLineArguments()) + args shouldBe None + } + + it should "successfully parse an explicit Google access token stregy invocation" in { + val args = parser.parse(Array( + "--access-token-strategy", "google", + drsObject, + containerPath, + "--requester-pays-project", requesterPaysProject + ), CommandLineArguments()).get args.drsObject.get shouldBe drsObject args.containerPath.get shouldBe containerPath @@ -65,6 +125,7 @@ class CommandLineParserSpec extends AnyFlatSpec with CromwellTimeoutSpec with Ma args.azureVaultName shouldBe empty args.azureSecretName shouldBe empty args.azureIdentityClientId shouldBe empty + args.manifestPath shouldBe empty } it should "fail to parse an Azure invocation missing vault name and secret name" in { @@ -98,7 +159,9 @@ class CommandLineParserSpec extends AnyFlatSpec with CromwellTimeoutSpec with Ma "--access-token-strategy", AccessTokenStrategy.Azure, "--secret-name", azureSecretName, "--vault-name", azureVaultName, - drsObject, containerPath, requesterPaysProject), CommandLineArguments()) + drsObject, + containerPath, + "--requester-pays-project", requesterPaysProject), CommandLineArguments()) args shouldBe None } @@ -117,6 +180,7 @@ class CommandLineParserSpec extends AnyFlatSpec with CromwellTimeoutSpec with Ma args.azureVaultName.get shouldBe azureVaultName args.azureSecretName.get shouldBe azureSecretName args.azureIdentityClientId shouldBe empty + args.manifestPath shouldBe empty } it should "successfully parse an Azure invocation with all the trimmings" in { @@ -134,6 +198,7 @@ class CommandLineParserSpec extends AnyFlatSpec with CromwellTimeoutSpec with Ma args.azureVaultName.get shouldBe azureVaultName args.azureSecretName.get shouldBe azureSecretName args.azureIdentityClientId.get shouldBe azureIdentityClientId + args.manifestPath shouldBe empty } it should "fail to parse with an unrecognized access token strategy" in { diff --git a/project/Dependencies.scala b/project/Dependencies.scala index e85188a2ff1..50c76893e80 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -564,6 +564,7 @@ object Dependencies { "com.iheart" %% "ficus" % ficusV, "com.softwaremill.sttp" %% "circe" % sttpV, "com.github.scopt" %% "scopt" % scoptV, + "org.apache.commons" % "commons-csv" % commonsCsvV, ) ++ circeDependencies ++ catsDependencies ++ slf4jBindingDependencies ++ languageFactoryDependencies ++ azureDependencies val allProjectDependencies: List[ModuleID] = diff --git a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiAsyncBackendJobExecutionActor.scala index e79ef867a54..c1c68b7dd39 100644 --- a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -542,6 +542,8 @@ class PipelinesApiAsyncBackendJobExecutionActor(override val standardParams: Sta Future.successful(PendingExecutionHandle(jobDescriptor, jobForResumption, Option(Run(jobForResumption)), previousState = None)) } + protected def uploadDrsLocalizationManifest(createPipelineParameters: CreatePipelineParameters, cloudPath: Path): Future[Unit] = Future.successful(()) + protected def uploadGcsTransferLibrary(createPipelineParameters: CreatePipelineParameters, cloudPath: Path, gcsTransferConfiguration: GcsTransferConfiguration): Future[Unit] = Future.successful(()) protected def uploadGcsLocalizationScript(createPipelineParameters: CreatePipelineParameters, @@ -639,6 +641,8 @@ class PipelinesApiAsyncBackendJobExecutionActor(override val standardParams: Sta customLabels <- Future.fromTry(GoogleLabels.fromWorkflowOptions(workflowDescriptor.workflowOptions)) jesParameters <- generateInputOutputParameters createParameters = createPipelineParameters(jesParameters, customLabels) + drsLocalizationManifestCloudPath = jobPaths.callExecutionRoot / PipelinesApiJobPaths.DrsLocalizationManifestName + _ <- uploadDrsLocalizationManifest(createParameters, drsLocalizationManifestCloudPath) gcsTransferConfiguration = initializationData.papiConfiguration.papiAttributes.gcsTransferConfiguration gcsTransferLibraryCloudPath = jobPaths.callExecutionRoot / PipelinesApiJobPaths.GcsTransferLibraryName transferLibraryContainerPath = createParameters.commandScriptContainerPath.sibling(GcsTransferLibraryName) diff --git a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiJobPaths.scala b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiJobPaths.scala index 0fc9a8ff49d..ae047187825 100644 --- a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiJobPaths.scala +++ b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiJobPaths.scala @@ -13,6 +13,7 @@ object PipelinesApiJobPaths { val GcsTransferLibraryName = "gcs_transfer.sh" val GcsLocalizationScriptName = "gcs_localization.sh" val GcsDelocalizationScriptName = "gcs_delocalization.sh" + val DrsLocalizationManifestName = "drs_manifest" } // Non-`final` as this is mocked for testing since using a real instance proved too difficult. diff --git a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala index 77370cabc92..aa49c385ba6 100644 --- a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -12,14 +12,19 @@ import cromwell.backend.google.pipelines.common.io.PipelinesApiWorkingDisk import cromwell.backend.google.pipelines.v2beta.PipelinesApiAsyncBackendJobExecutionActor._ import cromwell.backend.standard.StandardAsyncExecutionActorParams import cromwell.core.path.{DefaultPathBuilder, Path} +import cromwell.filesystems.drs.DrsPath import cromwell.filesystems.gcs.GcsPathBuilder.ValidFullGcsPath import cromwell.filesystems.gcs.{GcsPath, GcsPathBuilder} import org.apache.commons.codec.digest.DigestUtils +import org.apache.commons.csv.{CSVFormat, CSVPrinter} +import org.apache.commons.io.output.ByteArrayOutputStream import wom.core.FullyQualifiedName import wom.expression.FileEvaluation import wom.values.{GlobFunctions, WomFile, WomGlobFile, WomMaybeListedDirectory, WomMaybePopulatedFile, WomSingleFile, WomUnlistedDirectory} -import java.io.FileNotFoundException +import java.nio.charset.Charset + +import java.io.{FileNotFoundException, OutputStreamWriter} import scala.concurrent.Future import scala.io.Source import scala.language.postfixOps @@ -174,6 +179,14 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe import mouse.all._ + override def uploadDrsLocalizationManifest(createPipelineParameters: CreatePipelineParameters, cloudPath: Path): Future[Unit] = { + val content = generateDrsLocalizerManifest(createPipelineParameters.inputOutputParameters.fileInputParameters) + if (content.nonEmpty) + asyncIo.writeAsync(cloudPath, content, Seq(CloudStorageOptions.withMimeType("text/plain"))) + else + Future.unit + } + private def generateGcsLocalizationScript(inputs: List[PipelinesApiInput], referenceInputsToMountedPathsOpt: Option[Map[PipelinesApiInput, String]]) (implicit gcsTransferConfiguration: GcsTransferConfiguration): String = { @@ -396,4 +409,17 @@ object PipelinesApiAsyncBackendJobExecutionActor { } } combineAll } + + private [v2beta] def generateDrsLocalizerManifest(inputs: List[PipelinesApiInput]): String = { + val outputStream = new ByteArrayOutputStream() + val csvPrinter = new CSVPrinter(new OutputStreamWriter(outputStream), CSVFormat.DEFAULT) + val drsFileInputs = inputs collect { + case drsInput@PipelinesApiFileInput(_, drsPath: DrsPath, _, _) => (drsInput, drsPath) + } + drsFileInputs foreach { case (drsInput, drsPath) => + csvPrinter.printRecord(drsPath.pathAsString, drsInput.containerPath.pathAsString) + } + csvPrinter.close(true) + outputStream.toString(Charset.defaultCharset()) + } } diff --git a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesParameterConversions.scala b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesParameterConversions.scala index 505c23edb9c..64365fe404f 100644 --- a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesParameterConversions.scala +++ b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesParameterConversions.scala @@ -1,6 +1,5 @@ package cromwell.backend.google.pipelines.v2beta -import cloud.nio.impl.drs.{DrsCloudNioFileSystemProvider, DrsConfig} import com.google.api.services.lifesciences.v2beta.model.{Action, Mount} import com.typesafe.config.ConfigFactory import cromwell.backend.google.pipelines.common.action.ActionCommands._ @@ -27,25 +26,6 @@ trait PipelinesParameterConversions { val labels = ActionBuilder.parameterLabels(fileInput) fileInput.cloudPath match { - case drsPath: DrsPath => - - import scala.jdk.CollectionConverters._ - - val drsFileSystemProvider = drsPath.drsPath.getFileSystem.provider.asInstanceOf[DrsCloudNioFileSystemProvider] - - val drsDockerImage = config.getString("drs.localization.docker-image") - // Note: Don't ShellPath.escape the paths as we are directly invoking the localizer and NOT launching a shell. - val drsCommand = - List(fileInput.cloudPath.pathAsString, fileInput.containerPath.pathAsString) ++ - drsPath.requesterPaysProjectIdOption.toList - val marthaEnv = DrsConfig.toEnv(drsFileSystemProvider.drsConfig) - val localizationAction = ActionBuilder - .withImage(drsDockerImage) - .withCommand(drsCommand: _*) - .withMounts(mounts) - .setEnvironment(marthaEnv.asJava) - .withLabels(labels) - List(ActionBuilder.describeParameter(fileInput, labels), localizationAction) case sraPath: SraPath => val sraConfig = config.getConfig("filesystems.sra") @@ -78,6 +58,9 @@ trait PipelinesParameterConversions { case _: GcsPath => // GCS paths will be localized with a separate localization script. Nil + case _: DrsPath => + // DRS paths will be localized with a single call to cromwell-drs-localizer with a manifest + Nil } } } diff --git a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/Localization.scala b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/Localization.scala index 4aa52f89801..8e8065fe679 100644 --- a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/Localization.scala +++ b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/api/Localization.scala @@ -1,14 +1,21 @@ package cromwell.backend.google.pipelines.v2beta.api +import cloud.nio.impl.drs.DrsConfig import com.google.api.services.lifesciences.v2beta.model.{Action, Mount} +import com.typesafe.config.ConfigFactory import cromwell.backend.google.pipelines.common.action.ActionCommands.localizeFile import cromwell.backend.google.pipelines.common.action.ActionLabels._ import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.GcsTransferConfiguration +import cromwell.backend.google.pipelines.common.PipelinesApiFileInput import cromwell.backend.google.pipelines.common.PipelinesApiJobPaths._ import cromwell.backend.google.pipelines.common.api.PipelinesApiRequestFactory.CreatePipelineParameters import cromwell.backend.google.pipelines.v2beta.PipelinesConversions._ import cromwell.backend.google.pipelines.v2beta.ToParameter.ops._ -import cromwell.backend.google.pipelines.v2beta.api.ActionBuilder.cloudSdkShellAction +import cromwell.backend.google.pipelines.v2beta.api.ActionBuilder.{EnhancedAction, cloudSdkShellAction} +import cromwell.core.path.Path +import cromwell.filesystems.drs.DrsPath + +import scala.jdk.CollectionConverters._ trait Localization { @@ -34,15 +41,59 @@ trait Localization { val runGcsLocalizationScript = cloudSdkShellAction( s"/bin/bash $gcsLocalizationContainerPath")(mounts = mounts, labels = localizationLabel) + val drsInputs: List[DrsPath] = createPipelineParameters.inputOutputParameters.fileInputParameters.collect { + case PipelinesApiFileInput(_, drsPath: DrsPath, _, _) => drsPath + } + + val drsLocalizationActions = if (drsInputs.nonEmpty) { + val drsLocalizationManifestContainerPath = createPipelineParameters.commandScriptContainerPath.sibling(DrsLocalizationManifestName) + val localizeDrsLocalizationManifest = cloudSdkShellAction(localizeFile( + cloudPath = createPipelineParameters.cloudCallRoot / DrsLocalizationManifestName, + containerPath = drsLocalizationManifestContainerPath))(mounts = mounts, labels = localizationLabel) + + // Requester pays project id is stored on each DrsPath, but will be the same for all DRS inputs to a + // particular workflow because it's determined by the Google project set in workflow options. + val requesterPaysProjectId: Option[String] = drsInputs.flatMap(_.requesterPaysProjectIdOption).headOption + val runDrsLocalization = Localization.drsAction(drsLocalizationManifestContainerPath, mounts, localizationLabel, requesterPaysProjectId) + List(localizeDrsLocalizationManifest, runDrsLocalization) + } else List[Action]() + // Any "classic" PAPI v2 one-at-a-time localizations for non-GCS inputs. val singletonLocalizations = createPipelineParameters.inputOutputParameters.fileInputParameters.flatMap(_.toActions(mounts).toList) val localizations = localizeGcsTransferLibrary :: localizeGcsLocalizationScript :: runGcsLocalizationScript :: + drsLocalizationActions ::: localizeGcsDelocalizationScript :: singletonLocalizations ActionBuilder.annotateTimestampedActions("localization", Value.Localization)(localizations) } } + +object Localization { + + def drsAction(manifestPath: Path, + mounts: List[Mount], + labels: Map[String, String], + requesterPaysProjectId: Option[String] + ): Action = { + val config = ConfigFactory.load + val marthaConfig = config.getConfig("filesystems.drs.global.config.martha") + val drsConfig = DrsConfig.fromConfig(marthaConfig) + val drsDockerImage = config.getString("drs.localization.docker-image") + + val manifestArg = List("-m", manifestPath.pathAsString) + val requesterPaysArg = requesterPaysProjectId.map(r => List("-r", r)).getOrElse(List.empty) + val drsCommand = manifestArg ++ requesterPaysArg + + val marthaEnv = DrsConfig.toEnv(drsConfig) + ActionBuilder + .withImage(drsDockerImage) + .withCommand(drsCommand: _*) + .withMounts(mounts) + .setEnvironment(marthaEnv.asJava) + .withLabels(labels) + } +} diff --git a/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActorSpec.scala b/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActorSpec.scala index 9b93a976e22..093ecd3fa27 100644 --- a/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActorSpec.scala +++ b/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActorSpec.scala @@ -2,14 +2,22 @@ package cromwell.backend.google.pipelines.v2beta import java.nio.file.Paths import cats.data.NonEmptyList +import cloud.nio.impl.drs.DrsCloudNioFileProvider.DrsReadInterpreter +import cloud.nio.impl.drs.{DrsCloudNioFileSystemProvider, GoogleDrsCredentials} +import com.google.cloud.NoCredentials +import com.typesafe.config.{Config, ConfigFactory} import common.assertion.CromwellTimeoutSpec import common.mock.MockSugar import cromwell.backend.google.pipelines.common.PipelinesApiFileInput +import cromwell.backend.google.pipelines.common.io.{DiskType, PipelinesApiWorkingDisk} import cromwell.core.path.DefaultPathBuilder +import cromwell.filesystems.drs.DrsPathBuilder import org.mockito.Mockito._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers +import scala.concurrent.duration.DurationInt + class PipelinesApiAsyncBackendJobExecutionActorSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers with MockSugar { behavior of "PipelinesParameterConversions" @@ -44,4 +52,45 @@ class PipelinesApiAsyncBackendJobExecutionActorSpec extends AnyFlatSpec with Cro PipelinesApiAsyncBackendJobExecutionActor.groupParametersByGcsBucket(inputs) shouldEqual expected } + + it should "generate a CSV manifest for DRS inputs, ignoring non-DRS inputs" in { + def makeDrsPathBuilder: DrsPathBuilder = { + val marthaConfig: Config = ConfigFactory.parseString( + """martha { + | url = "http://martha-url" + |} + |""".stripMargin + ) + + val fakeCredentials = NoCredentials.getInstance + + val drsReadInterpreter: DrsReadInterpreter = (_, _) => + throw new UnsupportedOperationException("PipelinesApiAsyncBackendJobExecutionActorSpec doesn't need to use drs read interpreter.") + + DrsPathBuilder( + new DrsCloudNioFileSystemProvider(marthaConfig, GoogleDrsCredentials(fakeCredentials, 1.minutes), drsReadInterpreter), + None, + ) + } + + val mount = PipelinesApiWorkingDisk(DiskType.LOCAL, 1) + + def makeDrsInput(name: String, drsUri: String, containerPath: String): PipelinesApiFileInput = { + val drsPath = makeDrsPathBuilder.build(drsUri).get + val containerRelativePath = DefaultPathBuilder.get(containerPath) + PipelinesApiFileInput(name, drsPath, containerRelativePath, mount) + } + + val nonDrsInput: PipelinesApiFileInput = PipelinesApiFileInput("nnn", + DefaultPathBuilder.get("/local/nnn.bai"), DefaultPathBuilder.get("/path/to/nnn.bai"), mount) + + val inputs = List( + makeDrsInput("aaa", "drs://drs.example.org/aaa", "path/to/aaa.bai"), + nonDrsInput, + makeDrsInput("bbb", "drs://drs.example.org/bbb", "path/to/bbb.bai") + ) + + PipelinesApiAsyncBackendJobExecutionActor.generateDrsLocalizerManifest(inputs) shouldEqual + "drs://drs.example.org/aaa,/cromwell_root/path/to/aaa.bai\r\ndrs://drs.example.org/bbb,/cromwell_root/path/to/bbb.bai\r\n" + } } diff --git a/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/PipelinesConversionsSpec.scala b/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/PipelinesConversionsSpec.scala deleted file mode 100644 index c3ed9d27b02..00000000000 --- a/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/PipelinesConversionsSpec.scala +++ /dev/null @@ -1,96 +0,0 @@ -package cromwell.backend.google.pipelines.v2beta - -import cloud.nio.impl.drs.DrsCloudNioFileProvider.DrsReadInterpreter -import cloud.nio.impl.drs.{DrsCloudNioFileSystemProvider, GoogleDrsCredentials} -import com.google.cloud.NoCredentials -import com.typesafe.config.{Config, ConfigFactory} -import common.assertion.CromwellTimeoutSpec -import cromwell.backend.google.pipelines.common.PipelinesApiConfigurationAttributes.GcsTransferConfiguration -import cromwell.backend.google.pipelines.common.PipelinesApiFileInput -import cromwell.backend.google.pipelines.common.action.ActionUtils -import cromwell.backend.google.pipelines.common.io.{DiskType, PipelinesApiWorkingDisk} -import cromwell.core.path.DefaultPathBuilder -import cromwell.filesystems.drs.DrsPathBuilder -import eu.timepit.refined.refineMV -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers - -import scala.jdk.CollectionConverters._ -import scala.concurrent.duration.DurationInt - -class PipelinesConversionsSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers { - - behavior of "PipelinesConversions" - implicit val gcsTransferConfiguration: GcsTransferConfiguration = - GcsTransferConfiguration(transferAttempts = refineMV(1), parallelCompositeUploadThreshold = "0") - - private val marthaConfig: Config = ConfigFactory.parseString( - """martha { - | url = "http://martha-url" - |} - |""".stripMargin - ) - - private lazy val fakeCredentials = NoCredentials.getInstance - - private val drsReadInterpreter: DrsReadInterpreter = (_, _) => - throw new UnsupportedOperationException("Currently PipelinesConversionsSpec doesn't need to use drs read interpreter.") - - it should "create a DRS input parameter" in { - - val drsPathBuilder = DrsPathBuilder( - new DrsCloudNioFileSystemProvider(marthaConfig, GoogleDrsCredentials(fakeCredentials, 1.minutes), drsReadInterpreter), - None, - ) - val drsPath = drsPathBuilder.build("drs://drs.example.org/aaaabbbb-cccc-dddd-eeee-abcd0000dcba").get - val containerRelativePath = DefaultPathBuilder.get("path/to/file.bai") - val mount = PipelinesApiWorkingDisk(DiskType.LOCAL, 1) - val input = PipelinesApiFileInput("example", drsPath, containerRelativePath, mount) - val actions = PipelinesConversions.inputToParameter.toActions(input, Nil) - actions.size should be(2) - - val logging = actions.head - - logging.keySet.asScala should contain theSameElementsAs - Set("entrypoint", "commands", "imageUri", "labels", "mounts", "timeout") - - logging.get("commands") should be(a[java.util.List[_]]) - logging.get("commands").asInstanceOf[java.util.List[_]] should contain( - """printf '%s %s\n' "$(date -u '+%Y/%m/%d %H:%M:%S')" """ + - """Localizing\ input\ drs://drs.example.org/aaaabbbb-cccc-dddd-eeee-abcd0000dcba\ """ + - """-\>\ /cromwell_root/path/to/file.bai""" - ) - - logging.get("mounts") should be(a[java.util.List[_]]) - logging.get("mounts").asInstanceOf[java.util.List[_]] should be (empty) - - logging.get("imageUri") should be(ActionUtils.CloudSdkImage) - - val loggingLabels = logging.get("labels").asInstanceOf[java.util.Map[_, _]] - loggingLabels.keySet.asScala should contain theSameElementsAs List("logging", "inputName") - loggingLabels.get("logging") should be("Localization") - loggingLabels.get("inputName") should be("example") - - val action = actions.tail.head - - action.keySet.asScala should contain theSameElementsAs - Set("commands", "environment", "imageUri", "labels", "mounts") - - action.get("commands") should be(a[java.util.List[_]]) - action.get("commands").asInstanceOf[java.util.List[_]] should contain theSameElementsAs List( - "drs://drs.example.org/aaaabbbb-cccc-dddd-eeee-abcd0000dcba", - "/cromwell_root/path/to/file.bai" - ) - - action.get("mounts") should be(a[java.util.List[_]]) - action.get("mounts").asInstanceOf[java.util.List[_]] should be (empty) - - action.get("imageUri") should be("somerepo/drs-downloader:tagged") - - val actionLabels = action.get("labels").asInstanceOf[java.util.Map[_, _]] - actionLabels.keySet.asScala should contain theSameElementsAs List("tag", "inputName") - actionLabels.get("tag") should be("Localization") - actionLabels.get("inputName") should be("example") - } - -} diff --git a/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/LocalizationSpec.scala b/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/LocalizationSpec.scala new file mode 100644 index 00000000000..f815ceebd14 --- /dev/null +++ b/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/LocalizationSpec.scala @@ -0,0 +1,67 @@ +package cromwell.backend.google.pipelines.v2beta.api + +import common.assertion.CromwellTimeoutSpec +import cromwell.backend.google.pipelines.common.PipelinesApiJobPaths.DrsLocalizationManifestName +import cromwell.core.path.DefaultPathBuilder +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import scala.jdk.CollectionConverters._ + +class LocalizationSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers { + + behavior of "Localization" + + it should "create the right action to localize DRS files using a manifest" in { + + val manifestPathString = s"path/to/${DrsLocalizationManifestName}" + val manifestPath = DefaultPathBuilder.get(manifestPathString) + val tagKey = "tag" + val tagLabel = "myLabel" + + val action = Localization.drsAction(manifestPath, Nil, Map(tagKey -> tagLabel), None) + action.keySet.asScala should contain theSameElementsAs + Set("commands", "environment", "imageUri", "labels", "mounts") + + action.get("commands") should be(a[java.util.List[_]]) + action.get("commands").asInstanceOf[java.util.List[_]] should contain theSameElementsAs List( + "-m", manifestPathString + ) + + action.get("mounts") should be(a[java.util.List[_]]) + action.get("mounts").asInstanceOf[java.util.List[_]] should be (empty) + + action.get("imageUri") should be("somerepo/drs-downloader:tagged") + + val actionLabels = action.get("labels").asInstanceOf[java.util.Map[_, _]] + actionLabels.keySet.asScala should contain theSameElementsAs List("tag") + actionLabels.get(tagKey) should be(tagLabel) + } + + it should "create the right action to localize DRS files using a manifest with requester pays" in { + + val manifestPathString = s"path/to/${DrsLocalizationManifestName}" + val manifestPath = DefaultPathBuilder.get(manifestPathString) + val tagKey = "tag" + val tagLabel = "myLabel" + val requesterPaysProjectId = "123" + + val action = Localization.drsAction(manifestPath, Nil, Map(tagKey -> tagLabel), Option(requesterPaysProjectId)) + action.keySet.asScala should contain theSameElementsAs + Set("commands", "environment", "imageUri", "labels", "mounts") + + action.get("commands") should be(a[java.util.List[_]]) + action.get("commands").asInstanceOf[java.util.List[_]] should contain theSameElementsAs List( + "-m", manifestPathString, "-r", requesterPaysProjectId + ) + + action.get("mounts") should be(a[java.util.List[_]]) + action.get("mounts").asInstanceOf[java.util.List[_]] should be (empty) + + action.get("imageUri") should be("somerepo/drs-downloader:tagged") + + val actionLabels = action.get("labels").asInstanceOf[java.util.Map[_, _]] + actionLabels.keySet.asScala should contain theSameElementsAs List("tag") + actionLabels.get(tagKey) should be(tagLabel) + } +} From f4a19ba925d498a7c01645e897915f0aa7061ac6 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Mon, 3 Oct 2022 14:11:14 -0400 Subject: [PATCH 090/326] WX-755 Build all images instead of just Cromwell (#6919) --- .github/workflows/chart_update_on_merge.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/chart_update_on_merge.yml b/.github/workflows/chart_update_on_merge.yml index 9e847bc17eb..81a7c3a8259 100644 --- a/.github/workflows/chart_update_on_merge.yml +++ b/.github/workflows/chart_update_on_merge.yml @@ -48,12 +48,12 @@ jobs: with: username: dsdejenkins password: ${{ secrets.DSDEJENKINS_PASSWORD }} + # Build & push `cromwell`, `womtool`, `cromiam`, and `cromwell-drs-localizer` - name: Build Cromwell Docker run: | set -e cd cromwell - sbt server/docker - docker push broadinstitute/cromwell:$CROMWELL_SNAP_VERSION + sbt dockerBuildAndPush - name: Edit & push chart env: BROADBOT_GITHUB_TOKEN: ${{ secrets.BROADBOT_GITHUB_TOKEN }} From 07d4a7472def7220ca644016d19e142b9268b972 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Tue, 4 Oct 2022 11:16:41 -0400 Subject: [PATCH 091/326] WX-755 Add `isRelease` option for Docker builds (#6923) --- .github/workflows/chart_update_on_merge.yml | 2 +- project/Publishing.scala | 15 ++++++++++++++- project/Version.scala | 13 ++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/.github/workflows/chart_update_on_merge.yml b/.github/workflows/chart_update_on_merge.yml index 81a7c3a8259..f87b48d484e 100644 --- a/.github/workflows/chart_update_on_merge.yml +++ b/.github/workflows/chart_update_on_merge.yml @@ -53,7 +53,7 @@ jobs: run: | set -e cd cromwell - sbt dockerBuildAndPush + sbt -Dproject.isSnapshot=false -Dproject.isRelease=false dockerBuildAndPush - name: Edit & push chart env: BROADBOT_GITHUB_TOKEN: ${{ secrets.BROADBOT_GITHUB_TOKEN }} diff --git a/project/Publishing.scala b/project/Publishing.scala index 150c30ca293..1b798780086 100644 --- a/project/Publishing.scala +++ b/project/Publishing.scala @@ -36,7 +36,20 @@ object Publishing { ArrayBuffer(broadinstitute/cromwell:dev, broadinstitute/cromwell:develop) */ dockerTags := { - val versionsCsv = if (Version.isSnapshot) version.value else s"$cromwellVersion,${version.value}" + val versionsCsv = if (Version.isSnapshot) { + // Tag looks like `85-443a6fc-SNAP` + version.value + } else { + if (Version.isRelease) { + // Tags look like `85`, `85-443a6fc` + s"$cromwellVersion,${version.value}" + } else { + // Tag looks like `85-443a6fc` + version.value + } + } + + // Travis applies (as of 10/22) the `dev` and `develop` tags on merge to `develop` sys.env.getOrElse("CROMWELL_SBT_DOCKER_TAGS", versionsCsv).split(",") }, docker / imageNames := dockerTags.value map { tag => diff --git a/project/Version.scala b/project/Version.scala index dbe795e6f0c..ca782b990e2 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -13,8 +13,19 @@ object Version { * * The value is read in directly from the system property `project.isSnapshot` as there were confusing issues with * the multi-project and sbt.Keys#isSnapshot(). + * + * Default `true`. + */ + val isSnapshot: Boolean = sys.props.get("project.isSnapshot").forall(_.toBoolean) + + /** + * Returns `true` if this project should tag a release like `85` in addition to a hash like `85-443a6fc`. + * + * Has no effect when `isSnapshot` is `true`. + * + * Default `true`. */ - val isSnapshot = sys.props.get("project.isSnapshot").forall(_.toBoolean) + val isRelease: Boolean = sys.props.get("project.isRelease").forall(_.toBoolean) // Adapted from SbtGit.versionWithGit def cromwellVersionWithGit: Seq[Setting[_]] = From dd40e08c5d1cb4ccac371a7c645b3a522f0c7df1 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 5 Oct 2022 10:19:36 -0400 Subject: [PATCH 092/326] WX-755 Cromwell/CromIAM automatically board train (#6924) --- .github/workflows/chart_update_on_merge.yml | 22 +++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/workflows/chart_update_on_merge.yml b/.github/workflows/chart_update_on_merge.yml index f87b48d484e..1d5d0bd7864 100644 --- a/.github/workflows/chart_update_on_merge.yml +++ b/.github/workflows/chart_update_on_merge.yml @@ -41,7 +41,7 @@ jobs: echo "CROMWELL_NUMBER=$((previous_version + 1))" >> $GITHUB_ENV - name: Save complete image ID run: | - echo "CROMWELL_SNAP_VERSION=`echo "$CROMWELL_NUMBER-$CROMWELL_SHORT_SHA-SNAP"`" >> $GITHUB_ENV + echo "CROMWELL_VERSION=`echo "$CROMWELL_NUMBER-$CROMWELL_SHORT_SHA"`" >> $GITHUB_ENV # `DSDEJENKINS_PASSWORD` auto syncs from vault with https://github.com/broadinstitute/terraform-ap-deployments/pull/614 - name: Login to Docker Hub uses: docker/login-action@v1 @@ -54,6 +54,20 @@ jobs: set -e cd cromwell sbt -Dproject.isSnapshot=false -Dproject.isRelease=false dockerBuildAndPush + - name: Deploy to dev and board release train (Cromwell) + uses: broadinstitute/repository-dispatch@master + with: + token: ${{ secrets.BROADBOT_GITHUB_TOKEN }} + repository: broadinstitute/terra-helmfile + event-type: update-service + client-payload: '{"service": "cromwell", "version": "$CROMWELL_VERSION", "dev_only": false}' + - name: Deploy to dev and board release train (CromIAM) + uses: broadinstitute/repository-dispatch@master + with: + token: ${{ secrets.BROADBOT_GITHUB_TOKEN }} + repository: broadinstitute/terra-helmfile + event-type: update-service + client-payload: '{"service": "cromiam", "version": "$CROMWELL_VERSION", "dev_only": false}' - name: Edit & push chart env: BROADBOT_GITHUB_TOKEN: ${{ secrets.BROADBOT_GITHUB_TOKEN }} @@ -62,10 +76,10 @@ jobs: cd cromwhelm git checkout main ls -la - sed -i "s/appVersion.*/appVersion: \"$CROMWELL_SNAP_VERSION\"/" cromwell-helm/Chart.yaml - sed -i "s/image: broadinstitute\/cromwell.*/image: broadinstitute\/cromwell:$CROMWELL_SNAP_VERSION/" cromwell-helm/templates/cromwell.yaml + sed -i "s/appVersion.*/appVersion: \"$CROMWELL_VERSION\"/" cromwell-helm/Chart.yaml + sed -i "s/image: broadinstitute\/cromwell.*/image: broadinstitute\/cromwell:$CROMWELL_VERSION/" cromwell-helm/templates/cromwell.yaml git diff git config --global user.name "broadbot" git config --global user.email "broadbot@broadinstitute.org" - git commit -am "Auto update to Cromwell $CROMWELL_SNAP_VERSION" + git commit -am "Auto update to Cromwell $CROMWELL_VERSION" git push https://broadbot:$BROADBOT_GITHUB_TOKEN@github.com/broadinstitute/cromwhelm.git main From 980d4100d4ce7454e4259166bbe95f41a4f06660 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Wed, 5 Oct 2022 13:55:22 -0400 Subject: [PATCH 093/326] WX-755 Fix environment variable syntax (#6926) --- .github/workflows/chart_update_on_merge.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/chart_update_on_merge.yml b/.github/workflows/chart_update_on_merge.yml index 1d5d0bd7864..aa7a2c9c9ad 100644 --- a/.github/workflows/chart_update_on_merge.yml +++ b/.github/workflows/chart_update_on_merge.yml @@ -60,14 +60,14 @@ jobs: token: ${{ secrets.BROADBOT_GITHUB_TOKEN }} repository: broadinstitute/terra-helmfile event-type: update-service - client-payload: '{"service": "cromwell", "version": "$CROMWELL_VERSION", "dev_only": false}' + client-payload: '{"service": "cromwell", "version": "${{ env.CROMWELL_VERSION }}", "dev_only": false}' - name: Deploy to dev and board release train (CromIAM) uses: broadinstitute/repository-dispatch@master with: token: ${{ secrets.BROADBOT_GITHUB_TOKEN }} repository: broadinstitute/terra-helmfile event-type: update-service - client-payload: '{"service": "cromiam", "version": "$CROMWELL_VERSION", "dev_only": false}' + client-payload: '{"service": "cromiam", "version": "${{ env.CROMWELL_VERSION }}", "dev_only": false}' - name: Edit & push chart env: BROADBOT_GITHUB_TOKEN: ${{ secrets.BROADBOT_GITHUB_TOKEN }} From 7ad40b195ea67e8e6b3c62d935bdc5ece277c887 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Thu, 6 Oct 2022 15:22:41 -0400 Subject: [PATCH 094/326] WX-743 Enable TES task creation with BlobPaths (#6921) * Give blob SAS tokens write permission * Case class wrapper for subscription id * Resolve duplicate container name in absolute BlobPath * Ignored test demonstrating correct absolute path generation * Update filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala Co-authored-by: Brian Reilly * PR feedback Co-authored-by: Brian Reilly --- .../blob/BlobFileSystemManager.scala | 14 ++++++--- .../filesystems/blob/BlobPathBuilder.scala | 29 ++++++++++++++++--- .../blob/BlobPathBuilderFactory.scala | 7 +++-- .../blob/BlobPathBuilderSpec.scala | 13 +++++++++ 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala index 3b8f5149055..a3e6f33572a 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala @@ -77,10 +77,15 @@ case class BlobFileSystemManager( sealed trait BlobTokenGenerator {def generateAccessToken: Try[AzureSasCredential]} object BlobTokenGenerator { - def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, subscription: Option[String]): BlobTokenGenerator = { + def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, subscription: Option[SubscriptionId]): BlobTokenGenerator = { createBlobTokenGenerator(container, endpoint, None, None, subscription) } - def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, workspaceId: Option[WorkspaceId], workspaceManagerURL: Option[WorkspaceManagerURL], subscription: Option[String]): BlobTokenGenerator = { + def createBlobTokenGenerator(container: BlobContainerName, + endpoint: EndpointURL, + workspaceId: Option[WorkspaceId], + workspaceManagerURL: Option[WorkspaceManagerURL], + subscription: Option[SubscriptionId] + ): BlobTokenGenerator = { (container: BlobContainerName, endpoint: EndpointURL, workspaceId, workspaceManagerURL) match { case (container, endpoint, None, None) => NativeBlobTokenGenerator(container, endpoint, subscription) @@ -100,13 +105,13 @@ case class WSMBlobTokenGenerator(container: BlobContainerName, endpoint: Endpoin def generateAccessToken: Try[AzureSasCredential] = Failure(new NotImplementedError) } -case class NativeBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, subscription: Option[String] = None) extends BlobTokenGenerator { +case class NativeBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, subscription: Option[SubscriptionId] = None) extends BlobTokenGenerator { private val azureProfile = new AzureProfile(AzureEnvironment.AZURE) private def azureCredentialBuilder = new DefaultAzureCredentialBuilder() .authorityHost(azureProfile.getEnvironment.getActiveDirectoryEndpoint) .build - private def authenticateWithSubscription(sub: String) = AzureResourceManager.authenticate(azureCredentialBuilder, azureProfile).withSubscription(sub) + private def authenticateWithSubscription(sub: SubscriptionId) = AzureResourceManager.authenticate(azureCredentialBuilder, azureProfile).withSubscription(sub.value) private def authenticateWithDefaultSubscription = AzureResourceManager.authenticate(azureCredentialBuilder, azureProfile).withDefaultSubscription() private def azure = subscription.map(authenticateWithSubscription(_)).getOrElse(authenticateWithDefaultSubscription) @@ -123,6 +128,7 @@ case class NativeBlobTokenGenerator(container: BlobContainerName, endpoint: Endp .setReadPermission(true) .setCreatePermission(true) .setListPermission(true) + .setWritePermission(true) def generateAccessToken: Try[AzureSasCredential] = for { diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala index 3e69ce2a7bd..1be6bb8e13b 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala @@ -67,19 +67,40 @@ class BlobPathBuilder(container: BlobContainerName, endpoint: EndpointURL)(priva override def name: String = "Azure Blob Storage" } +object BlobPath { + // The Azure NIO library uses `{containerName}:` as the root of the path. + // This doesn't work well for our need to easily transfer back and forth + // to and from the blob URL format. This method removes anything up to and including + // the first colon, to create a path string useful for working with BlobPath. + // This is safe because the NIO library enforces no colons except to mark + // the root container name. + private def nioPathString(nioPath: NioPath): String = { + val pathStr = nioPath.toString + pathStr.substring(pathStr.indexOf(":")+1) + } + + def apply(nioPath: NioPath, + endpoint: EndpointURL, + container: BlobContainerName, + fsm: BlobFileSystemManager): BlobPath = { + BlobPath(nioPathString(nioPath), endpoint, container)(fsm) + } +} + case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, container: BlobContainerName)(private val fsm: BlobFileSystemManager) extends Path { override def nioPath: NioPath = findNioPath(pathString) - override protected def newPath(nioPath: NioPath): Path = BlobPath(nioPath.toString, endpoint, container)(fsm) + override protected def newPath(nioPath: NioPath): Path = BlobPath(nioPath, endpoint, container, fsm) - override def pathAsString: String = List(endpoint, container, nioPath.toString).mkString("/") + override def pathAsString: String = List(endpoint, container, pathString.stripPrefix("/")).mkString("/") //This is purposefully an unprotected get because if the endpoint cannot be parsed this should fail loudly rather than quietly - override def pathWithoutScheme: String = parseURI(endpoint.value).map(_.getHost + "/" + container + "/" + nioPath.toString).get + override def pathWithoutScheme: String = parseURI(endpoint.value).map(u => List(u.getHost, container, pathString).mkString("/")).get private def findNioPath(path: String): NioPath = (for { fileSystem <- fsm.retrieveFilesystem() - nioPath = fileSystem.getPath(path) + // The Azure NIO library uses `{container}:` to represent the root of the path + nioPath = fileSystem.getPath(s"${container.value}:", path) // This is purposefully an unprotected get because the NIO API needing an unwrapped path object. // If an error occurs the api expects a thrown exception } yield nioPath).get diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala index c93f751b706..e6d269a07b0 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala @@ -10,18 +10,19 @@ import scala.concurrent.{ExecutionContext, Future} final case class BlobFileSystemConfig(config: Config) +final case class SubscriptionId(value: String) {override def toString: String = value} final case class BlobContainerName(value: String) {override def toString: String = value} final case class StorageAccountName(value: String) {override def toString: String = value} final case class EndpointURL(value: String) {override def toString: String = value} final case class WorkspaceId(value: String) {override def toString: String = value} final case class WorkspaceManagerURL(value: String) {override def toString: String = value} final case class BlobPathBuilderFactory(globalConfig: Config, instanceConfig: Config, singletonConfig: BlobFileSystemConfig) extends PathBuilderFactory { - val subscription: Option[String] = instanceConfig.as[Option[String]]("subscription") + val subscription: Option[SubscriptionId] = instanceConfig.as[Option[String]]("subscription").map(SubscriptionId) val container: BlobContainerName = BlobContainerName(instanceConfig.as[String]("container")) val endpoint: EndpointURL = EndpointURL(instanceConfig.as[String]("endpoint")) - val workspaceId: Option[WorkspaceId] = instanceConfig.as[Option[String]]("workspace-id").map(WorkspaceId(_)) + val workspaceId: Option[WorkspaceId] = instanceConfig.as[Option[String]]("workspace-id").map(WorkspaceId) val expiryBufferMinutes: Long = instanceConfig.as[Option[Long]]("expiry-buffer-minutes").getOrElse(10) - val workspaceManagerURL: Option[WorkspaceManagerURL] = singletonConfig.config.as[Option[String]]("workspace-manager-url").map(WorkspaceManagerURL(_)) + val workspaceManagerURL: Option[WorkspaceManagerURL] = singletonConfig.config.as[Option[String]]("workspace-manager-url").map(WorkspaceManagerURL) val blobTokenGenerator: BlobTokenGenerator = BlobTokenGenerator.createBlobTokenGenerator( container, endpoint, workspaceId, workspaceManagerURL, subscription) diff --git a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala index 9975065a3e2..671e7b2d35b 100644 --- a/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala +++ b/filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala @@ -91,4 +91,17 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar { val fileText = (is.readAllBytes.map(_.toChar)).mkString fileText should include ("This is my test file!!!! Did it work?") } + + ignore should "resolve a path without duplicating container name" in { + val endpoint = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage") + val store = BlobContainerName("inputs") + val blobTokenGenerator = NativeBlobTokenGenerator(store, endpoint) + val fsm: BlobFileSystemManager = BlobFileSystemManager(store, endpoint, 10, blobTokenGenerator) + + val rootString = s"${endpoint.value}/${store.value}/cromwell-execution" + val blobRoot: BlobPath = new BlobPathBuilder(store, endpoint)(fsm) build rootString getOrElse fail() + blobRoot.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution") + val otherFile = blobRoot.resolve("test/inputFile.txt") + otherFile.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt") + } } From 18fbb14a6dc4e8813cde24c88b725bcb51640ad1 Mon Sep 17 00:00:00 2001 From: Brian Reilly Date: Fri, 7 Oct 2022 10:36:04 -0400 Subject: [PATCH 095/326] [WX-765] Update snakeyaml to 1.33 (#6927) * update snakeyaml to 1.33 * Don't use deprecated no-arg Constructor constructor Co-authored-by: Janet Gainer-Dewar --- .../test/scala/cromiam/webservice/SwaggerServiceSpec.scala | 7 ++++--- .../scala/cromwell/webservice/SwaggerServiceSpec.scala | 6 +++--- project/Dependencies.scala | 2 +- wom/src/main/scala/wom/util/YamlUtils.scala | 6 ++++-- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/CromIAM/src/test/scala/cromiam/webservice/SwaggerServiceSpec.scala b/CromIAM/src/test/scala/cromiam/webservice/SwaggerServiceSpec.scala index c0277fd92c0..838a523f4eb 100644 --- a/CromIAM/src/test/scala/cromiam/webservice/SwaggerServiceSpec.scala +++ b/CromIAM/src/test/scala/cromiam/webservice/SwaggerServiceSpec.scala @@ -12,7 +12,7 @@ import org.scalatest.prop.TableDrivenPropertyChecks import org.yaml.snakeyaml.constructor.Constructor import org.yaml.snakeyaml.error.YAMLException import org.yaml.snakeyaml.nodes.MappingNode -import org.yaml.snakeyaml.{Yaml => SnakeYaml} +import org.yaml.snakeyaml.{LoaderOptions, Yaml => SnakeYaml} import scala.jdk.CollectionConverters._ @@ -21,6 +21,7 @@ class SwaggerServiceSpec extends AnyFlatSpec with CromwellTimeoutSpec with Swagg with TableDrivenPropertyChecks { def actorRefFactory = system override def oauthConfig: SwaggerOauthConfig = SwaggerOauthConfig("clientId", "realm", "appName") + val yamlLoaderOptions = new LoaderOptions behavior of "SwaggerService" @@ -32,7 +33,7 @@ class SwaggerServiceSpec extends AnyFlatSpec with CromwellTimeoutSpec with Swagg contentType should be(ContentTypes.`application/octet-stream`) val body = responseAs[String] - val yaml = new SnakeYaml(new UniqueKeyConstructor()).loadAs(body, classOf[java.util.Map[String, AnyRef]]) + val yaml = new SnakeYaml(new UniqueKeyConstructor(new LoaderOptions)).loadAs(body, classOf[java.util.Map[String, AnyRef]]) yaml.get("swagger") should be("2.0") } @@ -109,7 +110,7 @@ class SwaggerServiceSpec extends AnyFlatSpec with CromwellTimeoutSpec with Swagg * Adapted from: * https://bitbucket.org/asomov/snakeyaml/src/e9cd9f5e8d76c61eb983e29b3dc039c1fac9c393/src/test/java/org/yaml/snakeyaml/issues/issue139/UniqueKeyTest.java?fileviewer=file-view-default#UniqueKeyTest.java-43:62 */ -class UniqueKeyConstructor extends Constructor { +class UniqueKeyConstructor(val loaderOptions: LoaderOptions) extends Constructor(loaderOptions) { import java.util.{Map => JMap} diff --git a/engine/src/test/scala/cromwell/webservice/SwaggerServiceSpec.scala b/engine/src/test/scala/cromwell/webservice/SwaggerServiceSpec.scala index ed4e588cfd1..24cce8ae932 100644 --- a/engine/src/test/scala/cromwell/webservice/SwaggerServiceSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/SwaggerServiceSpec.scala @@ -11,7 +11,7 @@ import org.scalatest.prop.TableDrivenPropertyChecks import org.yaml.snakeyaml.constructor.Constructor import org.yaml.snakeyaml.error.YAMLException import org.yaml.snakeyaml.nodes.MappingNode -import org.yaml.snakeyaml.{Yaml => SnakeYaml} +import org.yaml.snakeyaml.{LoaderOptions, Yaml => SnakeYaml} import scala.jdk.CollectionConverters._ @@ -28,7 +28,7 @@ class SwaggerServiceSpec extends AnyFlatSpec with CromwellTimeoutSpec with Swagg status should be(StatusCodes.OK) val body = responseAs[String] - val yaml = new SnakeYaml(new UniqueKeyConstructor()).loadAs(body, classOf[java.util.Map[String, AnyRef]]) + val yaml = new SnakeYaml(new UniqueKeyConstructor(new LoaderOptions)).loadAs(body, classOf[java.util.Map[String, AnyRef]]) yaml.get("swagger") should be("2.0") } @@ -100,7 +100,7 @@ class SwaggerServiceSpec extends AnyFlatSpec with CromwellTimeoutSpec with Swagg * Adapted from: * https://bitbucket.org/asomov/snakeyaml/src/e9cd9f5e8d76c61eb983e29b3dc039c1fac9c393/src/test/java/org/yaml/snakeyaml/issues/issue139/UniqueKeyTest.java?fileviewer=file-view-default#UniqueKeyTest.java-43:62 */ -class UniqueKeyConstructor extends Constructor { +class UniqueKeyConstructor(val loaderOptions: LoaderOptions) extends Constructor(loaderOptions) { import java.util.{Map => JMap} diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 50c76893e80..ebb4b1c0d69 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -118,7 +118,7 @@ object Dependencies { * https://github.com/slick/slick/pull/2201 */ private val slickV = "3.4.0-M1" - private val snakeyamlV = "1.30" + private val snakeyamlV = "1.33" private val sprayJsonV = "1.3.6" private val sttpV = "1.7.2" private val swaggerParserV = "1.0.56" diff --git a/wom/src/main/scala/wom/util/YamlUtils.scala b/wom/src/main/scala/wom/util/YamlUtils.scala index b4e0f063c9b..50dffc0457d 100644 --- a/wom/src/main/scala/wom/util/YamlUtils.scala +++ b/wom/src/main/scala/wom/util/YamlUtils.scala @@ -38,7 +38,7 @@ object YamlUtils { maxDepth: Int Refined NonNegative = defaultMaxDepth ): Either[ParsingFailure, Json] = { try { - val yamlConstructor = new SafeConstructor() + val yamlConstructor = new SafeConstructor(loaderOptions) val yamlComposer = new MaxDepthComposer(yaml, maxDepth) yamlConstructor.setComposer(yamlComposer) val parsed = yamlConstructor.getSingleData(classOf[AnyRef]) @@ -76,12 +76,14 @@ object YamlUtils { // org.yaml.snakeyaml.error.YAMLException: Number of aliases for non-scalar nodes exceeds the specified max=50 // that comes from the SnakeYAML library. private val loaderOptions = new LoaderOptions() + loaderOptions.setAllowRecursiveKeys(true) + loaderOptions.setNestingDepthLimit(1000) loaderOptions.setMaxAliasesForCollections(Integer.MAX_VALUE) /** Extends SnakeYaml's Composer checking for a maximum depth before a StackOverflowError occurs. */ private class MaxDepthComposer(yaml: String, maxDepth: Int Refined NonNegative) extends Composer( - new ParserImpl(new StreamReader(new StringReader(yaml))), + new ParserImpl(new StreamReader(new StringReader(yaml)), loaderOptions), new Resolver(), loaderOptions ) { From c5994261f40c529fcad02fc4577db9aad6ade79e Mon Sep 17 00:00:00 2001 From: Katrina P <68349264+kpierre13@users.noreply.github.com> Date: Tue, 11 Oct 2022 09:43:42 -0400 Subject: [PATCH 096/326] WM-1414 Refactoring WesRunLog to omit Cromwell's "workflowLog" object (#6925) --- .../webservice/routes/wes/WesResponse.scala | 16 ++++++++++++++-- .../webservice/routes/wes/WesRouteSupport.scala | 2 +- .../webservice/routes/wes/WesRunLog.scala | 13 ------------- .../routes/wes/WesRouteSupportSpec.scala | 2 +- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/engine/src/main/scala/cromwell/webservice/routes/wes/WesResponse.scala b/engine/src/main/scala/cromwell/webservice/routes/wes/WesResponse.scala index 32e221272e0..58313cb5946 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/wes/WesResponse.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/wes/WesResponse.scala @@ -2,8 +2,7 @@ package cromwell.webservice.routes.wes import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport import cromwell.webservice.routes.wes.WesState.WesState -import spray.json.{DefaultJsonProtocol, RootJsonFormat} -import cromwell.webservice.routes.wes.WesRunLog +import spray.json.{DefaultJsonProtocol, JsObject, RootJsonFormat} sealed trait WesResponse extends Product with Serializable final case class WesErrorResponse(msg: String, status_code: Int) extends WesResponse @@ -12,6 +11,18 @@ final case class WesRunStatus(run_id: String, state: WesState) extends WesRespon final case class WesResponseRunList(runs: List[WesRunStatus]) extends WesResponse final case class WesResponseWorkflowMetadata(workflowLog: WesRunLog) extends WesResponse +final case class WesRunLog(run_id: String, + request: WesRunRequest, + state: WesState, + run_log: Option[WesLog], + task_logs: Option[List[WesLog]], + outputs: Option[JsObject] + ) extends WesResponse + +object WesRunLog { + def fromJson(json: String): WesRunLog = CromwellMetadata.fromJson(json).wesRunLog +} + final case class WesStatusInfoResponse(workflow_type_version: Map[String, Iterable[String]], supported_wes_versions: Iterable[String], @@ -48,6 +59,7 @@ object WesResponseJsonSupport extends SprayJsonSupport with DefaultJsonProtocol case i: WesStatusInfoResponse => i.toJson case l: WesResponseRunList => l.toJson case m: WesResponseWorkflowMetadata => m.toJson + case w: WesRunLog => w.toJson } } diff --git a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala index ba6546fb3e3..41973bb8981 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRouteSupport.scala @@ -228,7 +228,7 @@ object WesRouteSupport { val metadataJsonResponse = metadataBuilderActorRequest(workflowId, request, serviceRegistryActor) metadataJsonResponse.map { - case SuccessfulMetadataJsonResponse(_, responseJson) => WesResponseWorkflowMetadata(WesRunLog.fromJson(responseJson.toString())) + case SuccessfulMetadataJsonResponse(_, responseJson) => WesRunLog.fromJson(responseJson.toString()) case FailedMetadataJsonResponse(_, reason) => WesErrorResponse(reason.getMessage, StatusCodes.InternalServerError.intValue) } } diff --git a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRunLog.scala b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRunLog.scala index 344498e51ab..c882ebf4f8b 100644 --- a/engine/src/main/scala/cromwell/webservice/routes/wes/WesRunLog.scala +++ b/engine/src/main/scala/cromwell/webservice/routes/wes/WesRunLog.scala @@ -1,6 +1,5 @@ package cromwell.webservice.routes.wes -import cromwell.webservice.routes.wes.WesState.WesState import spray.json.JsObject @@ -20,15 +19,3 @@ final case class WesRunRequest(workflow_params: Option[JsObject], workflow_engine_parameters: Option[JsObject], workflow_url: Option[String] ) - -final case class WesRunLog(run_id: String, - request: WesRunRequest, - state: WesState, - run_log: Option[WesLog], - task_logs: Option[List[WesLog]], - outputs: Option[JsObject] - ) - -object WesRunLog { - def fromJson(json: String): WesRunLog = CromwellMetadata.fromJson(json).wesRunLog -} diff --git a/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala b/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala index 6e9a390ad4a..00a361a3174 100644 --- a/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/routes/wes/WesRouteSupportSpec.scala @@ -196,7 +196,7 @@ class WesRouteSupportSpec extends AsyncFlatSpec with ScalatestRouteTest with Mat wesRoutes ~> check { status should be(StatusCodes.OK) - val result = responseAs[JsObject].fields("workflowLog").asJsObject() + val result = responseAs[JsObject] result.fields.keys should contain allOf("request", "run_id", "state") result.fields("state") should be(JsString("RUNNING")) result.fields("run_id") should be(JsString(CromwellApiServiceSpec.wesWorkflowId.toString)) From b0a254112bf0589ebe22dea7abf520ba2c78c973 Mon Sep 17 00:00:00 2001 From: Christian Freitas Date: Fri, 14 Oct 2022 09:34:13 -0400 Subject: [PATCH 097/326] Upgrade Postgres to 42.4.1 (#6932) --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index ebb4b1c0d69..675c0f7c7dd 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -86,7 +86,7 @@ object Dependencies { private val mysqlV = "8.0.28" private val nettyV = "4.1.72.Final" private val owlApiV = "5.1.19" - private val postgresV = "42.3.3" + private val postgresV = "42.4.1" private val pprintV = "0.7.3" private val rdf4jV = "3.7.1" private val refinedV = "0.9.29" From 622c8e6b79b4ce123912ace0ed37b28f1c461324 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 14 Oct 2022 10:19:51 -0400 Subject: [PATCH 098/326] WX-735 Fix incorrect and/or nondeterministic filesystem ordering (#6930) --- .../core/path/DefaultPathBuilderFactory.scala | 3 +++ .../core/path/PathBuilderFactory.scala | 21 ++++++++++++------- .../cromwell/engine/EngineFilesystems.scala | 14 ++++++++----- .../blob/BlobPathBuilderFactory.scala | 3 +++ 4 files changed, 28 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala b/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala index 91f05b10ef7..20a2afdd5ce 100644 --- a/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala +++ b/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala @@ -2,6 +2,7 @@ package cromwell.core.path import akka.actor.ActorSystem import cromwell.core.WorkflowOptions +import cromwell.core.path.PathBuilderFactory.PriorityDefault import scala.concurrent.{ExecutionContext, Future} @@ -9,4 +10,6 @@ case object DefaultPathBuilderFactory extends PathBuilderFactory { override def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem, ec: ExecutionContext) = Future.successful(DefaultPathBuilder) val name = "local" val tuple = name -> this + + override def priority: Int = PriorityDefault } diff --git a/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala b/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala index 1f215ba2933..79adaae4d6b 100644 --- a/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala +++ b/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala @@ -5,6 +5,7 @@ import cromwell.core.{Dispatcher, WorkflowOptions} import cats.syntax.traverse._ import cats.instances.list._ import cats.instances.future._ +import cromwell.core.path.PathBuilderFactory.PriorityStandard import scala.concurrent.{ExecutionContext, Future} @@ -12,16 +13,13 @@ object PathBuilderFactory { // Given a list of factories, instantiates the corresponding path builders def instantiatePathBuilders(factories: List[PathBuilderFactory], workflowOptions: WorkflowOptions)(implicit as: ActorSystem): Future[List[PathBuilder]] = { implicit val ec: ExecutionContext = as.dispatchers.lookup(Dispatcher.IoDispatcher) - // The DefaultPathBuilderFactory always needs to be last. - // The reason is path builders are tried in order, and the default one is very generous in terms of paths it "thinks" it supports - // For instance, it will return a Path for a gcs url even though it doesn't really support it - val sortedFactories = factories.sortWith({ - case (_, DefaultPathBuilderFactory) => true - case (DefaultPathBuilderFactory, _) => false - case (a, b) => factories.indexOf(a) < factories.indexOf(b) - }) + val sortedFactories = factories.sortBy(_.priority) sortedFactories.traverse(_.withOptions(workflowOptions)) } + + val PriorityBlob = 100 // High priority to evaluate first, because blob files may inadvertently match other filesystems + val PriorityStandard = 1000 + val PriorityDefault = 10000 // "Default" is a fallback, evaluate last } /** @@ -29,4 +27,11 @@ object PathBuilderFactory { */ trait PathBuilderFactory { def withOptions(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[PathBuilder] + + /** + * Candidate filesystems are considered in a stable order, as some requests may match multiple filesystems. + * To customize this order, the priority of a filesystem may be adjusted. Lower number == higher priority. + * @return This filesystem's priority + */ + def priority: Int = PriorityStandard } diff --git a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala index 296d227d39f..c648261a7f4 100644 --- a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala +++ b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala @@ -8,19 +8,23 @@ import cromwell.core.filesystem.CromwellFileSystems import cromwell.core.path.{DefaultPathBuilderFactory, PathBuilder, PathBuilderFactory} import net.ceedubs.ficus.Ficus._ +import scala.collection.immutable.SortedMap import scala.concurrent.Future object EngineFilesystems { private val config: Config = ConfigFactory.load - private val defaultFileSystemFactory: Map[String, PathBuilderFactory] = + private val defaultFileSystemFactory: SortedMap[String, PathBuilderFactory] = Option(DefaultPathBuilderFactory.tuple) .filter(_ => config.as[Boolean]("engine.filesystems.local.enabled")) - .toMap + .to(collection.immutable.SortedMap) - private val pathBuilderFactories: Map[String, PathBuilderFactory] = { - CromwellFileSystems.instance.factoriesFromConfig(config.as[Config]("engine")) - .unsafe("Failed to instantiate engine filesystem") ++ defaultFileSystemFactory + private val pathBuilderFactories: SortedMap[String, PathBuilderFactory] = { + // Unordered maps are a classical source of randomness injection into a system + ( + CromwellFileSystems.instance.factoriesFromConfig(config.as[Config]("engine")) + .unsafe("Failed to instantiate engine filesystem") ++ defaultFileSystemFactory + ).to(collection.immutable.SortedMap) } def configuredPathBuilderFactories: List[PathBuilderFactory] = pathBuilderFactories.values.toList diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala index e6d269a07b0..1a9df21b0f2 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilderFactory.scala @@ -4,6 +4,7 @@ import akka.actor.ActorSystem import com.typesafe.config.Config import cromwell.core.WorkflowOptions import cromwell.core.path.PathBuilderFactory +import cromwell.core.path.PathBuilderFactory.PriorityBlob import net.ceedubs.ficus.Ficus._ import scala.concurrent.{ExecutionContext, Future} @@ -33,4 +34,6 @@ final case class BlobPathBuilderFactory(globalConfig: Config, instanceConfig: Co new BlobPathBuilder(container, endpoint)(fsm) } } + + override def priority: Int = PriorityBlob } From 1a3889689060dcff2b6706a98ae69fd2fa9f70ef Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 17 Oct 2022 16:18:09 -0400 Subject: [PATCH 099/326] WX-772 Update Scala to 2.13.9 (#6928) * Update Scala to 2.13.9 * Try updating sbt-scoverage * Does this version exist anywhere we can see? * This version actually exists * Update library version to remove conflict * Codegen version * Fix fun new 2.13.9 compiler errors * Resolve warnings * Newest Scala? * I guess not * Does this please Travis? * force ci * Back out changes to generated code Co-authored-by: Adam Nichols --- CHANGELOG.md | 5 +++++ .../standard/StandardCachingActorHelper.scala | 2 +- .../RootWorkflowFileHashCacheActor.scala | 2 +- .../centaur/reporting/BigQueryReporter.scala | 2 +- codegen_java/build.sbt | 2 +- .../main/scala/common/util/TerminalUtil.scala | 4 ++-- .../collections/EnhancedCollectionsSpec.scala | 6 +++--- .../cromwell/core/io/IoClientHelperSpec.scala | 4 ++-- .../cromwell/core/retry/BackoffSpec.scala | 2 +- .../callcaching/FetchCachedResultsActor.scala | 2 +- .../job/EngineJobExecutionActor.scala | 2 +- .../ValidatingCachingConfigSpec.scala | 2 +- project/Dependencies.scala | 8 ++++--- project/Settings.scala | 2 +- project/plugins.sbt | 2 +- ...ectiveTsvInstrumentationServiceActor.scala | 2 +- .../impl/MetadataDatabaseAccessSpec.scala | 2 +- .../PipelinesApiJobCachingActorHelper.scala | 2 +- ...inesApiAsyncBackendJobExecutionActor.scala | 4 ++-- .../v2alpha1/api/DeserializationSpec.scala | 6 +++--- ...inesApiAsyncBackendJobExecutionActor.scala | 4 ++-- .../v2beta/api/DeserializationSpec.scala | 6 +++--- .../scala/wdl/draft2/model/AstTools.scala | 4 ++-- .../scala/wom/types/WomFileTypeSpec.scala | 2 +- .../test/scala/wom/types/WomTypeSpec.scala | 21 ++++++++++--------- 25 files changed, 54 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 351adccf651..1e44012027c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,11 @@ supported. All DRS inputs to a task are now localized in a single PAPI action, which should improve speed and resolve failures observed when attempting to localize a large number of DRS files. +### Security Patching + +Updates to dependencies to fix security vulnerabilities. + + ## 84 Release Notes ### CromIAM enabled user checks diff --git a/backend/src/main/scala/cromwell/backend/standard/StandardCachingActorHelper.scala b/backend/src/main/scala/cromwell/backend/standard/StandardCachingActorHelper.scala index dd4254c8439..a64c6a5439c 100644 --- a/backend/src/main/scala/cromwell/backend/standard/StandardCachingActorHelper.scala +++ b/backend/src/main/scala/cromwell/backend/standard/StandardCachingActorHelper.scala @@ -78,7 +78,7 @@ trait StandardCachingActorHelper extends JobCachingActorHelper { val fileMetadata = jobPaths.metadataPaths - runtimeAttributesMetadata ++ fileMetadata ++ nonStandardMetadata + nonStandardMetadata ++ runtimeAttributesMetadata ++ fileMetadata } /** diff --git a/backend/src/main/scala/cromwell/backend/standard/callcaching/RootWorkflowFileHashCacheActor.scala b/backend/src/main/scala/cromwell/backend/standard/callcaching/RootWorkflowFileHashCacheActor.scala index e33625741b3..c3d1452660e 100644 --- a/backend/src/main/scala/cromwell/backend/standard/callcaching/RootWorkflowFileHashCacheActor.scala +++ b/backend/src/main/scala/cromwell/backend/standard/callcaching/RootWorkflowFileHashCacheActor.scala @@ -48,7 +48,7 @@ class RootWorkflowFileHashCacheActor private[callcaching](override val ioActor: // hash to become available. cache.put(key, FileHashValueRequested(requesters = requester :: requesters)) case FileHashSuccess(value) => - sender() ! Tuple2(hashCommand.fileHashContext, IoSuccess(requester.ioCommand, value)) + sender() ! Tuple2[Any, Any](hashCommand.fileHashContext, IoSuccess(requester.ioCommand, value)) case FileHashFailure(error) => sender() ! Tuple2(hashCommand.fileHashContext, IoFailure(requester.ioCommand, new IOException(error))) } diff --git a/centaur/src/it/scala/centaur/reporting/BigQueryReporter.scala b/centaur/src/it/scala/centaur/reporting/BigQueryReporter.scala index 16463742632..9a88a9ac109 100644 --- a/centaur/src/it/scala/centaur/reporting/BigQueryReporter.scala +++ b/centaur/src/it/scala/centaur/reporting/BigQueryReporter.scala @@ -189,7 +189,7 @@ class BigQueryReporter(override val params: ErrorReporterParams) extends ErrorRe } private def toJobKeyValueRow(jobKeyValueEntry: JobKeyValueEntry): RowToInsert = { - RowToInsert of Map( + RowToInsert of Map[String, Any]( "call_fully_qualified_name" -> jobKeyValueEntry.callFullyQualifiedName, "job_attempt" -> jobKeyValueEntry.jobAttempt, "job_index" -> jobKeyValueEntry.jobIndex, diff --git a/codegen_java/build.sbt b/codegen_java/build.sbt index 2ba35a693c3..0ad461a66b8 100644 --- a/codegen_java/build.sbt +++ b/codegen_java/build.sbt @@ -6,7 +6,7 @@ lazy val root = (project in file(".")). Seq(organization := "org.broadinstitute.cromwell", name := "cromwell-client", version := createVersion("0.1"), - scalaVersion := "2.13.8", + scalaVersion := "2.13.9", scalacOptions ++= Seq("-feature"), compile / javacOptions ++= Seq("-Xlint:deprecation"), Compile / packageDoc / publishArtifact := false, diff --git a/common/src/main/scala/common/util/TerminalUtil.scala b/common/src/main/scala/common/util/TerminalUtil.scala index dab7fb9ce17..cb7c980aecb 100644 --- a/common/src/main/scala/common/util/TerminalUtil.scala +++ b/common/src/main/scala/common/util/TerminalUtil.scala @@ -6,10 +6,10 @@ object TerminalUtil { def maxWidth(lengths: Seq[Seq[Int]], column: Int) = lengths.map { length => length(column) }.max val widths = (rows :+ header).map { row => row.map { s => s.length } } val maxWidths = widths.head.indices.map { column => maxWidth(widths, column) } - val tableHeader = header.indices.map { i => header(i).padTo(maxWidths(i), " ").mkString("") }.mkString("|") + val tableHeader = header.indices.map { i => header(i).padTo(maxWidths(i), ' ').mkString("") }.mkString("|") val tableDivider = header.indices.map { i => "-" * maxWidths(i) }.mkString("|") val tableRows = rows.map { row => - val mdRow = row.indices.map { i => row(i).padTo(maxWidths(i), " ").mkString("") }.mkString("|") + val mdRow = row.indices.map { i => row(i).padTo(maxWidths(i), ' ').mkString("") }.mkString("|") s"|$mdRow|" } s"|$tableHeader|\n|$tableDivider|\n${tableRows.mkString("\n")}\n" diff --git a/common/src/test/scala/common/collections/EnhancedCollectionsSpec.scala b/common/src/test/scala/common/collections/EnhancedCollectionsSpec.scala index 12dba2e15d9..d2e0a38878c 100644 --- a/common/src/test/scala/common/collections/EnhancedCollectionsSpec.scala +++ b/common/src/test/scala/common/collections/EnhancedCollectionsSpec.scala @@ -11,7 +11,7 @@ class EnhancedCollectionsSpec extends AsyncFlatSpec with Matchers { behavior of "EnhancedCollections" it should "filter a List by type and return a List" in { - val objectList = List("hello", 3, None, "world") + val objectList = List[Any]("hello", 3, None, "world") val stringList = objectList.filterByType[String] stringList should be(List("hello", "world")) @@ -28,14 +28,14 @@ class EnhancedCollectionsSpec extends AsyncFlatSpec with Matchers { } it should "filter a Set by type and return a Set" in { - val objectSet = Set("hello", 3, None, "world") + val objectSet = Set[Any]("hello", 3, None, "world") val intSet: Set[Int] = objectSet.filterByType[Int] intSet should be(Set(3)) } it should "find the first Int in a List" in { - val objectSet = List("hello", 3, None, 4, "world") + val objectSet = List[Any]("hello", 3, None, 4, "world") objectSet.firstByType[Int] should be(Some(3)) } diff --git a/core/src/test/scala/cromwell/core/io/IoClientHelperSpec.scala b/core/src/test/scala/cromwell/core/io/IoClientHelperSpec.scala index f77df6fa011..06132ba152b 100644 --- a/core/src/test/scala/cromwell/core/io/IoClientHelperSpec.scala +++ b/core/src/test/scala/cromwell/core/io/IoClientHelperSpec.scala @@ -27,7 +27,7 @@ class IoClientHelperSpec extends TestKitSuite with AnyFlatSpecLike with Matchers val testActor = TestActorRef(new IoClientHelperTestActor(ioActorProbe.ref, delegateProbe.ref, backoff, noResponseTimeout)) val command = DefaultIoSizeCommand(mock[Path]) - val response = IoSuccess(command, 5) + val response = IoSuccess(command, 5L) // Send the command testActor.underlyingActor.sendMessage(command) @@ -58,7 +58,7 @@ class IoClientHelperSpec extends TestKitSuite with AnyFlatSpecLike with Matchers val commandContext = "context" val command = DefaultIoSizeCommand(mock[Path]) - val response = IoSuccess(command, 5) + val response = IoSuccess(command, 5L) // Send the command testActor.underlyingActor.sendMessageWithContext(commandContext, command) diff --git a/core/src/test/scala/cromwell/core/retry/BackoffSpec.scala b/core/src/test/scala/cromwell/core/retry/BackoffSpec.scala index 58fa597dc03..c6a008feb02 100644 --- a/core/src/test/scala/cromwell/core/retry/BackoffSpec.scala +++ b/core/src/test/scala/cromwell/core/retry/BackoffSpec.scala @@ -54,7 +54,7 @@ class BackoffSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers { it should "parse config" in { val config = ConfigFactory.parseMap( - Map( + Map[String, Any]( "min" -> "5 seconds", "max" -> "30 seconds", "multiplier" -> 6D, diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/FetchCachedResultsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/FetchCachedResultsActor.scala index 0eef565caf2..89745e6523d 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/FetchCachedResultsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/FetchCachedResultsActor.scala @@ -37,7 +37,7 @@ class FetchCachedResultsActor(cacheResultId: CallCachingEntryId, replyTo: ActorR val sourceCacheDetails = Seq(result.callCachingEntry.workflowExecutionUuid, result.callCachingEntry.callFullyQualifiedName, - result.callCachingEntry.jobIndex).mkString(":") + result.callCachingEntry.jobIndex.toString).mkString(":") CachedOutputLookupSucceeded(simpletons, jobDetritusFiles.toMap, result.callCachingEntry.returnCode, diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala index 3bbdb5eb1f2..4423de202cc 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/job/EngineJobExecutionActor.scala @@ -753,7 +753,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def logCacheHitSuccessAndNotifyMetadata(data: ResponsePendingData): Unit = { - val metadataMap = Map(callCachingHitResultMetadataKey -> true) ++ data.ejeaCacheHit.flatMap(_.details).map(details => callCachingReadResultMetadataKey -> s"Cache Hit: $details").toMap + val metadataMap = Map[String, Any](callCachingHitResultMetadataKey -> true) ++ data.ejeaCacheHit.flatMap(_.details).map(details => callCachingReadResultMetadataKey -> s"Cache Hit: $details").toMap writeToMetadata(metadataMap) diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/ValidatingCachingConfigSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/ValidatingCachingConfigSpec.scala index 40b628a700d..095721569e7 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/ValidatingCachingConfigSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/ValidatingCachingConfigSpec.scala @@ -11,7 +11,7 @@ import scala.util.{Failure, Success, Try} class ValidatingCachingConfigSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers with TableDrivenPropertyChecks { it should "run config tests" in { - val cases = Table( + val cases = Table[String, Any]( ("config" , "exceptionMessage" ), ("enabled = not-a-boolean", "String: 1: enabled has type STRING rather than BOOLEAN" ), ("enabled = true" , true ), diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 675c0f7c7dd..0a0dee70cfa 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -89,7 +89,7 @@ object Dependencies { private val postgresV = "42.4.1" private val pprintV = "0.7.3" private val rdf4jV = "3.7.1" - private val refinedV = "0.9.29" + private val refinedV = "0.10.1" private val rhinoV = "1.7.14" private val scalaCollectionCompatV = "2.5.0" @@ -480,7 +480,8 @@ object Dependencies { val coreDependencies: List[ModuleID] = List( "com.google.auth" % "google-auth-library-oauth2-http" % googleOauth2V, "com.chuusai" %% "shapeless" % shapelessV, - "com.storm-enroute" %% "scalameter" % scalameterV % Test, + "com.storm-enroute" %% "scalameter" % scalameterV % Test + exclude("org.scala-lang.modules", "scala-xml_2.13"), "com.github.scopt" %% "scopt" % scoptV, ) ++ akkaStreamDependencies ++ configDependencies ++ catsDependencies ++ circeDependencies ++ googleApiClientDependencies ++ statsDDependencies ++ betterFilesDependencies ++ @@ -509,7 +510,8 @@ object Dependencies { "com.storm-enroute" %% "scalameter" % scalameterV exclude("com.fasterxml.jackson.core", "jackson-databind") exclude("com.fasterxml.jackson.module", "jackson-module-scala") - exclude("org.scala-tools.testing", "test-interface"), + exclude("org.scala-tools.testing", "test-interface") + exclude("org.scala-lang.modules", "scala-xml_2.13"), "com.fasterxml.jackson.core" % "jackson-databind" % jacksonV, "io.github.andrebeat" %% "scala-pool" % scalaPoolV ) ++ swaggerUiDependencies ++ akkaHttpDependencies ++ akkaHttpCirceIntegrationDependency ++ circeDependencies ++ diff --git a/project/Settings.scala b/project/Settings.scala index 951b1fc8fe6..797d7917dd9 100644 --- a/project/Settings.scala +++ b/project/Settings.scala @@ -70,7 +70,7 @@ object Settings { assembly / assemblyMergeStrategy := customMergeStrategy.value, ) - val Scala2_13Version = "2.13.8" + val Scala2_13Version = "2.13.9" private val ScalaVersion: String = Scala2_13Version private val sharedSettings: Seq[Setting[_]] = cromwellVersionWithGit ++ publishingSettings ++ List( diff --git a/project/plugins.sbt b/project/plugins.sbt index dd6a98e1b9b..34bccb47a0a 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,5 +1,5 @@ addSbtPlugin("se.marcuslonnberg" % "sbt-docker" % "1.9.0") addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "1.1.1") addSbtPlugin("com.github.sbt" % "sbt-git" % "2.0.0") -addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.3") +addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.4") addDependencyTreePlugin diff --git a/services/src/main/scala/cromwell/services/instrumentation/impl/selectivetsv/SelectiveTsvInstrumentationServiceActor.scala b/services/src/main/scala/cromwell/services/instrumentation/impl/selectivetsv/SelectiveTsvInstrumentationServiceActor.scala index fd1864d45f0..4df3f966db6 100644 --- a/services/src/main/scala/cromwell/services/instrumentation/impl/selectivetsv/SelectiveTsvInstrumentationServiceActor.scala +++ b/services/src/main/scala/cromwell/services/instrumentation/impl/selectivetsv/SelectiveTsvInstrumentationServiceActor.scala @@ -143,7 +143,7 @@ object SelectiveTsvInstrumentationServiceActor { val header = (List("timestamp") ++ interestingFields).mkString("\t") val rows = stateHistory.map { case (timestamp, fieldMap) => - (Vector(timestamp.toString) ++ interestingFields.map(f => fieldMap.getOrElse(f, 0))).mkString("\t") + (Vector(timestamp.toString) ++ interestingFields.map(f => fieldMap.getOrElse(f, 0).toString)).mkString("\t") } Vector(header) ++ rows } diff --git a/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala b/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala index 8e6029140e9..2a812419775 100644 --- a/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala +++ b/services/src/test/scala/cromwell/services/metadata/impl/MetadataDatabaseAccessSpec.scala @@ -287,7 +287,7 @@ class MetadataDatabaseAccessSpec extends AnyFlatSpec with CromwellTimeoutSpec wi } // Filter by workflow id within random Ids _ <- dataAccess.queryWorkflowSummaries(WorkflowQueryParameters( - (randomIds :+ workflow1Id).map(id => WorkflowQueryKey.Id.name -> id.toString))) map { case (response, _) => + (randomIds :+ workflow1Id.toString).map(id => WorkflowQueryKey.Id.name -> id))) map { case (response, _) => val resultsById = response.results groupBy { _.name } diff --git a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiJobCachingActorHelper.scala b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiJobCachingActorHelper.scala index 2e6682c2e69..353e33a5019 100644 --- a/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiJobCachingActorHelper.scala +++ b/supportedBackends/google/pipelines/common/src/main/scala/cromwell/backend/google/pipelines/common/PipelinesApiJobCachingActorHelper.scala @@ -75,7 +75,7 @@ trait PipelinesApiJobCachingActorHelper extends StandardCachingActorHelper { .get(WorkflowOptionKeys.GoogleProject) .getOrElse(jesAttributes.project) - Map( + Map[String, Any]( PipelinesApiMetadataKeys.GoogleProject -> googleProject, PipelinesApiMetadataKeys.ExecutionBucket -> initializationData.workflowPaths.executionRootString, PipelinesApiMetadataKeys.EndpointUrl -> jesAttributes.endpointUrl, diff --git a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala index a0f899a26ff..2da04c65e92 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -141,9 +141,9 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe } val optional = Option(output) collectFirst { case o: PipelinesApiFileOutput if o.secondary || o.optional => "optional" } getOrElse "required" - val contentType = output.contentType.getOrElse("") + val contentType = output.contentType.map(_.toString).getOrElse("") - List(kind, output.cloudPath, output.containerPath, optional, contentType) + List(kind, output.cloudPath.toString, output.containerPath.toString, optional, contentType) } mkString("\"", "\"\n| \"", "\"") val parallelCompositeUploadThreshold = jobDescriptor.workflowDescriptor.workflowOptions.getOrElse( diff --git a/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/api/DeserializationSpec.scala b/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/api/DeserializationSpec.scala index 1c936e76317..7f10580b700 100644 --- a/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/api/DeserializationSpec.scala +++ b/supportedBackends/google/pipelines/v2alpha1/src/test/scala/cromwell/backend/google/pipelines/v2alpha1/api/DeserializationSpec.scala @@ -82,9 +82,9 @@ class DeserializationSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matc "commands" -> List[String]("echo", "hello").asJava ).asJava ).asJava, - "resources" -> Map( + "resources" -> Map[String, Object]( "projectId" -> "project", - "virtualMachine" -> Map( + "virtualMachine" -> Map[String, Any]( "machineType" -> "custom-1-1024", "preemptible" -> false ).asJava @@ -117,7 +117,7 @@ class DeserializationSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matc "commands" -> List[String]("echo", "hello").asJava ).asJava ).asJava, - "resources" -> Map( + "resources" -> Map[String, Object]( "projectId" -> "project", "virtualMachine" -> Map( "machineType" -> "custom-1-1024", diff --git a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala index aa49c385ba6..fb6c8d425f7 100644 --- a/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/google/pipelines/v2beta/src/main/scala/cromwell/backend/google/pipelines/v2beta/PipelinesApiAsyncBackendJobExecutionActor.scala @@ -146,9 +146,9 @@ class PipelinesApiAsyncBackendJobExecutionActor(standardParams: StandardAsyncExe } val optional = Option(output) collectFirst { case o: PipelinesApiFileOutput if o.secondary || o.optional => "optional" } getOrElse "required" - val contentType = output.contentType.getOrElse("") + val contentType = output.contentType.map(_.toString).getOrElse("") - List(kind, output.cloudPath, output.containerPath, optional, contentType) + List(kind, output.cloudPath.toString, output.containerPath.toString, optional, contentType) } mkString("\"", "\"\n| \"", "\"") val parallelCompositeUploadThreshold = jobDescriptor.workflowDescriptor.workflowOptions.getOrElse( diff --git a/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/DeserializationSpec.scala b/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/DeserializationSpec.scala index 37020b0b0d6..6afd75adbd6 100644 --- a/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/DeserializationSpec.scala +++ b/supportedBackends/google/pipelines/v2beta/src/test/scala/cromwell/backend/google/pipelines/v2beta/api/DeserializationSpec.scala @@ -81,9 +81,9 @@ class DeserializationSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matc "commands" -> List[String]("echo", "hello").asJava ).asJava ).asJava, - "resources" -> Map( + "resources" -> Map[String, Object]( "projectId" -> "project", - "virtualMachine" -> Map( + "virtualMachine" -> Map[String, Any]( "machineType" -> "custom-1-1024", "preemptible" -> false ).asJava @@ -115,7 +115,7 @@ class DeserializationSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matc "commands" -> List[String]("echo", "hello").asJava ).asJava ).asJava, - "resources" -> Map( + "resources" -> Map[String, Object]( "projectId" -> "project", "virtualMachine" -> Map( "machineType" -> "custom-1-1024", diff --git a/wdl/model/draft2/src/main/scala/wdl/draft2/model/AstTools.scala b/wdl/model/draft2/src/main/scala/wdl/draft2/model/AstTools.scala index c1f3c18242b..24194807f17 100644 --- a/wdl/model/draft2/src/main/scala/wdl/draft2/model/AstTools.scala +++ b/wdl/model/draft2/src/main/scala/wdl/draft2/model/AstTools.scala @@ -196,7 +196,7 @@ object AstTools { val pairType = womType.asInstanceOf[WomPairType] WomPair(subElements.head.womValue(pairType.leftType, wdlSyntaxErrorFormatter), subElements(1).womValue(pairType.rightType, wdlSyntaxErrorFormatter)) } else { - throw new SyntaxError(s"Could not convert AST to a $womType (${Option(astNode).getOrElse("No AST").toString})") + throw new SyntaxError(s"Could not convert AST to a $womType (${Option(astNode).map(_.toString).getOrElse("No AST")})") } } @@ -224,7 +224,7 @@ object AstTools { case a: Ast if a.getName == "TupleLiteral" => astTupleToValue(a) case a: Ast if a.getName == "MapLiteral" && womType.isInstanceOf[WomMapType] => astToMap(a) case a: Ast if a.getName == "ObjectLiteral" && womType == WomObjectType => astToObject(a) - case _ => throw new SyntaxError(s"Could not convert AST to a $womType (${Option(astNode).getOrElse("No AST").toString})") + case _ => throw new SyntaxError(s"Could not convert AST to a $womType (${Option(astNode).map(_.toString).getOrElse("No AST")})") } } } diff --git a/wom/src/test/scala/wom/types/WomFileTypeSpec.scala b/wom/src/test/scala/wom/types/WomFileTypeSpec.scala index 217c15964cc..c4a90fadb07 100644 --- a/wom/src/test/scala/wom/types/WomFileTypeSpec.scala +++ b/wom/src/test/scala/wom/types/WomFileTypeSpec.scala @@ -65,7 +65,7 @@ class WomFileTypeSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers } } - lazy val failedCoercionTests = Table( + lazy val failedCoercionTests = Table[String, WomType, Any, String]( ("description", "womFileType", "value", "expected"), ("a double to a dir", WomUnlistedDirectoryType, 6.28318, diff --git a/wom/src/test/scala/wom/types/WomTypeSpec.scala b/wom/src/test/scala/wom/types/WomTypeSpec.scala index 01c5030806d..4d3c7befeb5 100644 --- a/wom/src/test/scala/wom/types/WomTypeSpec.scala +++ b/wom/src/test/scala/wom/types/WomTypeSpec.scala @@ -10,6 +10,7 @@ import wom.values._ import scala.runtime.ScalaRunTime import scala.util.Random +import scala.util.matching.Regex class WomTypeSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers { @@ -32,7 +33,7 @@ class WomTypeSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers { WomSingleFileType.stableName shouldEqual "File" } - val rawValuesCoercedToType = Table( + val rawValuesCoercedToType = Table[Any, WomType, Any, Regex]( ( "Raw Value", "WomType", @@ -43,33 +44,33 @@ class WomTypeSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers { WomString("hello"), WomIntegerType, classOf[NumberFormatException], - "For input string: \"hello\"" + "For input string: \"hello\"".r ), ( WomInteger(0), WomBooleanType, classOf[IllegalArgumentException], - """No coercion defined from wom value\(s\) '0' of type 'Int' to 'Boolean'.""" + """No coercion defined from wom value\(s\) '0' of type 'Int' to 'Boolean'.""".r ), ( 0, WomBooleanType, classOf[IllegalArgumentException], - "No coercion defined from '0' of type 'java.lang.Integer' to 'Boolean'." + "No coercion defined from '0' of type 'java.lang.Integer' to 'Boolean'.".r ), ( Array(0, 1, 2, 3, 4), WomBooleanType, classOf[IllegalArgumentException], - """No coercion defined from 'Array\(0, 1, 2\)' of type 'int\[\]' to 'Boolean'.""" + """No coercion defined from 'Array\(0, 1, 2\)' of type 'int\[\]' to 'Boolean'.""".r ), ( new AnyRef {}, WomBooleanType, classOf[IllegalArgumentException], - "No coercion defined from" + + ("No coercion defined from" + """ 'wom.types.WomTypeSpec\$\$anon\$(.*)@.*' of type""" + - """ 'wom.types.WomTypeSpec\$\$anon\$\1' to 'Boolean'.""" + """ 'wom.types.WomTypeSpec\$\$anon\$\1' to 'Boolean'.""").r ), ( WomArray(WomArrayType(WomOptionalType(WomIntegerType)), Seq( @@ -81,19 +82,19 @@ class WomTypeSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers { ), WomOptionalType(WomMaybeEmptyArrayType(WomIntegerType)), classOf[IllegalArgumentException], - """No coercion defined from wom value\(s\) '\[0, 1, 2\]' of type 'Array\[Int\?\]' to 'Array\[Int\]\?'.""" + """No coercion defined from wom value\(s\) '\[0, 1, 2\]' of type 'Array\[Int\?\]' to 'Array\[Int\]\?'.""".r ), ( WomArray(WomArrayType(WomOptionalType(WomIntegerType)), Seq(WomOptionalValue.none(WomIntegerType))), WomOptionalType(WomMaybeEmptyArrayType(WomIntegerType)), classOf[IllegalArgumentException], - """No coercion defined from wom value\(s\) '\[null\]' of type 'Array\[Int\?\]' to 'Array\[Int\]\?'.""" + """No coercion defined from wom value\(s\) '\[null\]' of type 'Array\[Int\?\]' to 'Array\[Int\]\?'.""".r ), ( WomOptionalValue.none(WomArrayType(WomIntegerType)), WomMaybeEmptyArrayType(WomOptionalType(WomIntegerType)), classOf[IllegalArgumentException], - """No coercion defined from wom value\(s\) 'null' of type 'Array\[Int\]\?' to 'Array\[Int\?\]'.""" + """No coercion defined from wom value\(s\) 'null' of type 'Array\[Int\]\?' to 'Array\[Int\?\]'.""".r ) ) From d602cef3486892155de77ec4cae2298d87bc60c0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 19 Oct 2022 10:37:44 -0400 Subject: [PATCH 100/326] WX-781 Bump jackson-databind in /CromwellRefdiskManifestCreator (#6935) Bumps [jackson-databind](https://github.com/FasterXML/jackson) from 2.13.2.2 to 2.13.4.1. - [Release notes](https://github.com/FasterXML/jackson/releases) - [Commits](https://github.com/FasterXML/jackson/commits) --- updated-dependencies: - dependency-name: com.fasterxml.jackson.core:jackson-databind dependency-type: direct:production ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- CromwellRefdiskManifestCreator/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CromwellRefdiskManifestCreator/pom.xml b/CromwellRefdiskManifestCreator/pom.xml index 987aad421be..829f64b5fd3 100644 --- a/CromwellRefdiskManifestCreator/pom.xml +++ b/CromwellRefdiskManifestCreator/pom.xml @@ -48,7 +48,7 @@ com.fasterxml.jackson.core jackson-databind - 2.13.2.2 + 2.13.4.1 org.apache.logging.log4j From 34e358867ac8d9c1c9ac3cb462188f5a256cf2b6 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 21 Oct 2022 17:59:25 -0400 Subject: [PATCH 101/326] WX-808 Host allowlist for HTTP imports (#6938) * `hostAllowlist` that allows everything * Refactor * Stick allow list in HttpResolver * Better default config * Allow list tests * Make it build Co-authored-by: Janet Gainer-Dewar --- core/src/main/resources/reference.conf | 4 ++ .../languages/util/ImportResolver.scala | 53 +++++++++++++++---- .../languages/util/ImportResolverSpec.scala | 43 +++++++++++++-- .../NamespaceCacheSpec.scala | 2 +- 4 files changed, 85 insertions(+), 17 deletions(-) diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index c58eda5bc9f..8034409dbf5 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -439,6 +439,10 @@ engine { languages { default: WDL WDL { + http-allow-list { + enabled: false + allowed-http-hosts: [] + } versions { default: "draft-2" "draft-2" { diff --git a/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ImportResolver.scala b/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ImportResolver.scala index 01c5db23084..f7fd70cfc56 100644 --- a/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ImportResolver.scala +++ b/languageFactories/language-factory-core/src/main/scala/cromwell/languages/util/ImportResolver.scala @@ -9,12 +9,14 @@ import cats.syntax.either._ import cats.syntax.validated._ import com.softwaremill.sttp._ import com.softwaremill.sttp.asynchttpclient.cats.AsyncHttpClientCatsBackend +import com.typesafe.config.ConfigFactory import common.Checked import common.transforms.CheckedAtoB import common.validation.ErrorOr._ import common.validation.Checked._ import common.validation.Validation._ import cromwell.core.path.{DefaultPathBuilder, Path} +import net.ceedubs.ficus.Ficus._ import java.nio.file.{Path => NioPath} import java.security.MessageDigest @@ -157,7 +159,9 @@ object ImportResolver { } } - case class HttpResolver(relativeTo: Option[String] = None, headers: Map[String, String] = Map.empty) extends ImportResolver { + case class HttpResolver(relativeTo: Option[String], + headers: Map[String, String], + hostAllowlist: Option[List[String]]) extends ImportResolver { import HttpResolver._ override def name: String = relativeTo match { @@ -169,7 +173,7 @@ object ImportResolver { def newResolverList(newRoot: String): List[ImportResolver] = { val rootWithoutFilename = newRoot.split('/').init.mkString("", "/", "/") List( - HttpResolver(relativeTo = Some(canonicalize(rootWithoutFilename)), headers) + HttpResolver(relativeTo = Some(canonicalize(rootWithoutFilename)), headers, hostAllowlist) ) } @@ -183,18 +187,20 @@ object ImportResolver { else "Relative path".invalidNelCheck } + def isAllowed(uri: Uri): Boolean = hostAllowlist match { + case Some(hosts) => hosts.contains(uri.host) + case None => true + } + override def innerResolver(str: String, currentResolvers: List[ImportResolver]): Checked[ResolvedImportBundle] = { - pathToLookup(str) flatMap { toLookup => + pathToLookup(str) flatMap { toLookup: WorkflowSource => (Try { - implicit val sttpBackend = HttpResolver.sttpBackend() - val responseIO: IO[Response[String]] = sttp.get(uri"$toLookup").headers(headers).send() - - // temporary situation to get functionality working before - // starting in on async-ifying the entire WdlNamespace flow - val result: Checked[String] = Await.result(responseIO.unsafeToFuture(), 15.seconds).body.leftMap { e => NonEmptyList(e.toString.trim, List.empty) } + val uri: Uri = uri"$toLookup" - result map { - ResolvedImportBundle(_, newResolverList(toLookup), ResolvedImportRecord(toLookup)) + if (isAllowed(uri)) { + getUri(toLookup) + } else { + s"Disallowed domain in URI. ${uri.toString()}".invalidNelCheck } } match { case Success(result) => result @@ -203,6 +209,19 @@ object ImportResolver { } } + private def getUri(toLookup: WorkflowSource): Either[NonEmptyList[WorkflowSource], ResolvedImportBundle] = { + implicit val sttpBackend = HttpResolver.sttpBackend() + val responseIO: IO[Response[WorkflowSource]] = sttp.get(uri"$toLookup").headers(headers).send() + + // temporary situation to get functionality working before + // starting in on async-ifying the entire WdlNamespace flow + val result: Checked[WorkflowSource] = Await.result(responseIO.unsafeToFuture(), 15.seconds).body.leftMap { e => NonEmptyList(e.toString.trim, List.empty) } + + result map { + ResolvedImportBundle(_, newResolverList(toLookup), ResolvedImportRecord(toLookup)) + } + } + override def cleanupIfNecessary(): ErrorOr[Unit] = ().validNel override def hashKey: ErrorOr[String] = relativeTo.toString.md5Sum.validNel @@ -213,6 +232,18 @@ object ImportResolver { import common.util.IntrospectableLazy import common.util.IntrospectableLazy._ + def apply(relativeTo: Option[String] = None, + headers: Map[String, String] = Map.empty): HttpResolver = { + val config = ConfigFactory.load().getConfig("languages.WDL.http-allow-list") + val allowListEnabled = config.as[Option[Boolean]]("enabled").getOrElse(false) + val allowList: Option[List[String]] = + if (allowListEnabled) + config.as[Option[List[String]]]("allowed-http-hosts") + else None + + new HttpResolver(relativeTo, headers, allowList) + } + val sttpBackend: IntrospectableLazy[SttpBackend[IO, Nothing]] = lazily { // 2.13 Beginning with sttp 1.6.x a `ContextShift` parameter is now required to construct an // `AsyncHttpClientCatsBackend`. There may be a more appropriate choice for backing this than the global diff --git a/languageFactories/language-factory-core/src/test/scala/cromwell/languages/util/ImportResolverSpec.scala b/languageFactories/language-factory-core/src/test/scala/cromwell/languages/util/ImportResolverSpec.scala index 26249b02da1..813a688a573 100644 --- a/languageFactories/language-factory-core/src/test/scala/cromwell/languages/util/ImportResolverSpec.scala +++ b/languageFactories/language-factory-core/src/test/scala/cromwell/languages/util/ImportResolverSpec.scala @@ -1,7 +1,8 @@ package cromwell.languages.util -import java.nio.file.{Files, Paths} +import com.softwaremill.sttp._ +import java.nio.file.{Files, Paths} import common.assertion.CromwellTimeoutSpec import common.assertion.ErrorOrAssertions._ import cromwell.core.WorkflowId @@ -41,13 +42,13 @@ class ImportResolverSpec extends AnyFlatSpec with CromwellTimeoutSpec with Match } it should "resolve a path from no initial root" in { - val resolver = HttpResolver() + val resolver = HttpResolver(None, Map.empty, None) val toResolve = resolver.pathToLookup("http://abc.com:8000/blah1/blah2.wdl") toResolve shouldBeValid "http://abc.com:8000/blah1/blah2.wdl" } it should "resolve a path and store the import in ResolvedImportRecord" in { - val resolver = HttpResolver() + val resolver = HttpResolver(None, Map.empty, None) val importUri = "https://raw.githubusercontent.com/broadinstitute/cromwell/develop/centaur/src/main/resources/standardTestCases/hello/hello.wdl" val resolvedBundle = resolver.innerResolver(importUri, List(resolver)) @@ -57,10 +58,42 @@ class ImportResolverSpec extends AnyFlatSpec with CromwellTimeoutSpec with Match } } + behavior of "HttpResolver with allowList" + + val allowList = Option(List("my.favorite.wdls.com", "anotherwdlsite.org")) + val pathEnd = "bob/loblaw/blah/blah.wdl" + + it should "allow any import when there is no allow list" in { + val resolver = HttpResolver(None, Map.empty, None) + resolver.isAllowed(uri"https://my.favorite.wdls.com/$pathEnd") shouldBe true + resolver.isAllowed(uri"http://some-garbage.whatever.eu/$pathEnd") shouldBe true + resolver.isAllowed(uri"localhost:8080/my/secrets") shouldBe true + } + + it should "allow any import that's on the allow list" in { + val resolver = HttpResolver(None, Map.empty, allowList) + resolver.isAllowed(uri"https://my.favorite.wdls.com/$pathEnd") shouldBe true + resolver.isAllowed(uri"http://anotherwdlsite.org/$pathEnd") shouldBe true + resolver.isAllowed(uri"https://yetanotherwdlsite.org/$pathEnd") shouldBe false + resolver.isAllowed(uri"https://FOO.my.favorite.wdls.com/$pathEnd") shouldBe false + resolver.isAllowed(uri"https://wdls.com/$pathEnd") shouldBe false + resolver.isAllowed(uri"localhost:8080/my/secrets") shouldBe false + } + + it should "allow nothing with an empty allow list" in { + val resolver = HttpResolver(None, Map.empty, Option(List.empty)) + resolver.isAllowed(uri"https://my.favorite.wdls.com/$pathEnd") shouldBe false + resolver.isAllowed(uri"http://anotherwdlsite.org/$pathEnd") shouldBe false + resolver.isAllowed(uri"https://yetanotherwdlsite.org/$pathEnd") shouldBe false + resolver.isAllowed(uri"https://FOO.my.favorite.wdls.com/$pathEnd") shouldBe false + resolver.isAllowed(uri"https://wdls.com/$pathEnd") shouldBe false + resolver.isAllowed(uri"localhost:8080/my/secrets") shouldBe false + } + behavior of "HttpResolver with a 'relativeTo' value" - val relativeHttpResolver = HttpResolver(relativeTo = Some("http://abc.com:8000/blah1/blah2/")) - val relativeToGithubHttpResolver = HttpResolver(relativeTo = Some(relativeToGithubRoot)) + val relativeHttpResolver = HttpResolver(relativeTo = Some("http://abc.com:8000/blah1/blah2/"), Map.empty, None) + val relativeToGithubHttpResolver = HttpResolver(relativeTo = Some(relativeToGithubRoot), Map.empty, None) it should "resolve an absolute path from a different initial root" in { val pathToLookup = relativeHttpResolver.pathToLookup("http://def.org:8080/blah3.wdl") diff --git a/languageFactories/wdl-draft2/src/test/scala/languages.wdl.draft2/NamespaceCacheSpec.scala b/languageFactories/wdl-draft2/src/test/scala/languages.wdl.draft2/NamespaceCacheSpec.scala index df87f2fa644..b96cfa5c2f6 100644 --- a/languageFactories/wdl-draft2/src/test/scala/languages.wdl.draft2/NamespaceCacheSpec.scala +++ b/languageFactories/wdl-draft2/src/test/scala/languages.wdl.draft2/NamespaceCacheSpec.scala @@ -61,7 +61,7 @@ class NamespaceCacheSpec extends AnyFlatSpec with CromwellTimeoutSpec with Befor ) var lookupCount = 0 - val countingResolver = new HttpResolver() { + val countingResolver = new HttpResolver(None, Map.empty, None) { override def pathToLookup(str: String): Checked[String] = { lookupCount = lookupCount + 1 super.pathToLookup(str) From f64b2c45c38426e495a46d6630348fb8dec56e18 Mon Sep 17 00:00:00 2001 From: Christian Freitas Date: Mon, 24 Oct 2022 14:09:56 -0400 Subject: [PATCH 102/326] Update commons text to 1.10.0 (#6937) --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 0a0dee70cfa..8906e010531 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -30,7 +30,7 @@ object Dependencies { private val commonsLang3V = "3.12.0" private val commonsMathV = "3.6.1" private val commonNetV = "3.8.0" // via: https://commons.apache.org/proper/commons-net/ - private val commonsTextV = "1.9" + private val commonsTextV = "1.10.0" private val configsV = "0.6.1" private val delightRhinoSandboxV = "0.0.15" private val diffsonSprayJsonV = "4.1.1" From c775ddd94d32be35911b7faa345322fd67481d40 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Fri, 28 Oct 2022 09:32:35 -0400 Subject: [PATCH 103/326] WX-751 Token refresh signal for monitoring (#6939) * Log messages * `DEBUG` -> `INFO` --- .../cromwell/filesystems/blob/BlobFileSystemManager.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala index a3e6f33572a..3ebce4db878 100644 --- a/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala +++ b/filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobFileSystemManager.scala @@ -17,6 +17,7 @@ import java.time.{Duration, Instant, OffsetDateTime} import scala.jdk.CollectionConverters._ import scala.util.{Failure, Success, Try} import com.azure.resourcemanager.storage.models.StorageAccountKey +import com.typesafe.scalalogging.LazyLogging case class FileSystemAPI() { def getFileSystem(uri: URI): Try[FileSystem] = Try(FileSystems.getFileSystem(uri)) @@ -44,7 +45,7 @@ case class BlobFileSystemManager( expiryBufferMinutes: Long, blobTokenGenerator: BlobTokenGenerator, fileSystemAPI: FileSystemAPI = FileSystemAPI(), - private val initialExpiration: Option[Instant] = None) { + private val initialExpiration: Option[Instant] = None) extends LazyLogging { private var expiry: Option[Instant] = initialExpiration val buffer: Duration = Duration.of(expiryBufferMinutes, ChronoUnit.MINUTES) @@ -57,10 +58,13 @@ case class BlobFileSystemManager( shouldReopenFilesystem match { case false => fileSystemAPI.getFileSystem(uri).recoverWith { // If no filesystem already exists, this will create a new connection, with the provided configs - case _: FileSystemNotFoundException => blobTokenGenerator.generateAccessToken.flatMap(generateFilesystem(uri, container, _)) + case _: FileSystemNotFoundException => + logger.info(s"Creating new blob filesystem for URI $uri") + blobTokenGenerator.generateAccessToken.flatMap(generateFilesystem(uri, container, _)) } // If the token has expired, OR there is no token record, try to close the FS and regenerate case true => + logger.info(s"Closing & regenerating token for existing blob filesystem at URI $uri") fileSystemAPI.closeFileSystem(uri) blobTokenGenerator.generateAccessToken.flatMap(generateFilesystem(uri, container, _)) } From 8485284a52adcf9163b5fd2a60e95bd0e1ce012e Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 1 Nov 2022 10:14:30 -0400 Subject: [PATCH 104/326] WX-744 Optionally rewrite blob paths to appear as local paths (#6941) * Modify blob paths for TES * Make blob transformation configurable * Update supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala Co-authored-by: Adam Nichols * Apply PR feedback in second place Co-authored-by: Adam Nichols --- build.sbt | 1 + .../TesAsyncBackendJobExecutionActor.scala | 10 +++++++- .../cromwell/backend/impl/tes/TesTask.scala | 22 ++++++++++++++--- .../backend/impl/tes/TesTaskSpec.scala | 24 +++++++++++++++++++ 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/build.sbt b/build.sbt index 3b255d0e0e5..948d18039ab 100644 --- a/build.sbt +++ b/build.sbt @@ -245,6 +245,7 @@ lazy val tesBackend = (project in backendRoot / "tes") .dependsOn(sfsBackend) .dependsOn(ftpFileSystem) .dependsOn(drsFileSystem) + .dependsOn(azureBlobFileSystem) .dependsOn(backend % "test->test") .dependsOn(common % "test->test") diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala index 1fe5b743465..63b43ddbcf5 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala @@ -80,6 +80,14 @@ class TesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn private val tesEndpoint = workflowDescriptor.workflowOptions.getOrElse("endpoint", tesConfiguration.endpointURL) + // Temporary support for configuring the format we use to send BlobPaths to TES. + // Added 10/2022 as a workaround for the CromwellOnAzure TES server expecting + // blob containers to be mounted via blobfuse rather than addressed natively. + private val transformBlobToLocalPaths: Boolean = + configurationDescriptor.backendConfig + .getAs[Boolean]("transform-blob-to-local-path") + .getOrElse(false) + override lazy val jobTag: String = jobDescriptor.key.tag private val outputMode = validate { @@ -148,7 +156,7 @@ class TesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn mode) }) - tesTask.map(TesTask.makeTask) + tesTask.map(TesTask.makeTask(_, transformBlobToLocalPaths)) } def writeScriptFile(): Future[Unit] = { diff --git a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala index 52fdd7c3fce..719ad36942b 100644 --- a/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala +++ b/supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesTask.scala @@ -258,14 +258,16 @@ object TesTask { ) } - def makeTask(tesTask: TesTask): Task = { + def makeTask(tesTask: TesTask, transformBlobToLocalPath: Boolean = false): Task = { + val inputs = if (transformBlobToLocalPath) transformInputs(tesTask.inputs) else tesTask.inputs + val outputs = if (transformBlobToLocalPath) transformOutputs(tesTask.outputs) else tesTask.outputs Task( id = None, state = None, name = Option(tesTask.name), description = Option(tesTask.description), - inputs = Option(tesTask.inputs), - outputs = Option(tesTask.outputs), + inputs = Option(inputs), + outputs = Option(outputs), resources = Option(tesTask.resources), executors = tesTask.executors, volumes = None, @@ -273,6 +275,20 @@ object TesTask { logs = None ) } + + def transformInputs(inputs: Seq[Input]): Seq[Input] = inputs.map(i => + i.copy(url=i.url.map(transformBlobString)) + ) + + def transformOutputs(outputs: Seq[Output]): Seq[Output] = outputs.map(i => + i.copy(url=i.url.map(transformBlobString)) + ) + + val blobSegment = ".blob.core.windows.net" + def transformBlobString(s: String): String = if (s.contains(blobSegment)) { + s.replaceFirst("https:/", "").replaceFirst(blobSegment, "") + } else s + } // Field requirements in classes below based off GA4GH schema diff --git a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala index 0e8440f2a1d..c2ee68e0068 100644 --- a/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala +++ b/supportedBackends/tes/src/test/scala/cromwell/backend/impl/tes/TesTaskSpec.scala @@ -88,4 +88,28 @@ class TesTaskSpec task.tags shouldBe Option(Map("foo" -> "bar")) } + + it should "apply the correct transformation to a blob path" in { + val orig = "https://coaexternalstorage.blob.core.windows.net/cromwell/mydir/myfile.txt" + TesTask.transformBlobString(orig) shouldBe "/coaexternalstorage/cromwell/mydir/myfile.txt" + } + + it should "not transform a non-blob path" in { + val orig = "https://some-bogus-url.test/cromwell/mydir/myfile.txt" + TesTask.transformBlobString(orig) shouldBe orig + } + + it should "transform inputs" in { + val baseInput = Input( + Option("name"), + Option("descr"), + Option("https://coaexternalstorage.blob.core.windows.net/cromwell/mydir/myfile.txt"), + "path", + Option("type"), + Option("content"), + ) + val inputs = Option(Seq(baseInput)) + val outcome = inputs.map(TesTask.transformInputs) + outcome.get.head.url.get shouldBe "/coaexternalstorage/cromwell/mydir/myfile.txt" + } } From 269089cb8e51298f65c7ed875d749f40b640be32 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Thu, 3 Nov 2022 16:12:24 -0400 Subject: [PATCH 105/326] Update changelog for wdl http allow list (#6944) --- CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e44012027c..1bba52ebded 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,24 @@ failures observed when attempting to localize a large number of DRS files. Updates to dependencies to fix security vulnerabilities. +### Allow list for HTTP WDL resolution + +Administrators can now configure Cromwell with an allow list that limits the domains from which WDLs can be resolved and imported. +Default behavior is unchanged (Cromwell attempts to resolve WDL files from any URI). Example configuration: +``` +languages { + WDL { + http-allow-list { + enabled: true + allowed-http-hosts: [ + "my.wdl.repo.org", + "raw.githubusercontent.com" + ] + } + } +} +``` + ## 84 Release Notes From 4a6ad8e13a74ba278f05c99bf5fef27e064805d8 Mon Sep 17 00:00:00 2001 From: Katrina P <68349264+kpierre13@users.noreply.github.com> Date: Mon, 7 Nov 2022 10:55:22 -0500 Subject: [PATCH 106/326] WM-1491 Fixing Cromwell-client (#6943) * More updated client for use in cbas * Removing excess code --- docs/api/RESTAPI.md | 20 +++++------ .../src/main/resources/swagger/cromwell.yaml | 35 ++++++++----------- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/docs/api/RESTAPI.md b/docs/api/RESTAPI.md index f1713bc831a..9eaedd4a3a5 100644 --- a/docs/api/RESTAPI.md +++ b/docs/api/RESTAPI.md @@ -1,5 +1,5 @@ Call Node <------------+ | - * | +-----------+ | - * | Scatter Node | - * +----------------------------------------------+ - * - * MergeNode: If the step input has one or more sources, a merge node will be created and responsible for merging - * those input sources together. It will NOT evaluate the valueFrom field of the input. - * - * ScatterVariableNode: If the step input is being scattered over, a scatter variable node will be created and will - * act as a proxy inside the scatter graph for the shards of the scatter. It depends on an upstream merge node outputing an array - * and will provide at runtime shard values for other nodes of the scatter graph. - * - * OGIN: If the step has at least one input being scattered over, there will be a scatter node created. - * For inputs that are NOT being scattered over but still have one or more input sources (and hence a merge node), an OGIN - * will be created to act as a proxy to the merge node outside the scatter graph. - * - * ExpressionNode: If an input has a valueFrom field, an expression node will be created to evaluate the expression. - * An important fact to note is that the expression needs access to all other input values - * AFTER their source, default value and shard number has been determined but - * BEFORE their (potential) valueFrom is evaluated (see http://www.commonwl.org/v1.0/Workflow.html#WorkflowStepInput) - * This is why on the above diagram, StepInput0Expression depends on the OGIN, and StepInput1Expression depends on the scatter variable. - */ - def callWithInputs(typeMap: WomTypeMap, - workflow: Workflow, - knownNodes: Set[GraphNode], - workflowInputs: Map[String, GraphNodeOutputPort], - validator: RequirementsValidator, - expressionLib: ExpressionLib): Checked[Set[GraphNode]] = { - - implicit val parentName = workflow.explicitWorkflowName - - val scatterLookupSet = - scatter.toList. - flatMap(_.fold(StringOrStringArrayToStringList)). - map(id => cwl.FullyQualifiedName(id).id) - - def isStepScattered(workflowStepInputId: String) = scatterLookupSet.contains(workflowStepInputId) - - val unqualifiedStepId: WomIdentifier = { - cwl.FullyQualifiedName.maybeApply(id).map({ fqn => - WomIdentifier(LocalName(fqn.id), womFqn) - }).getOrElse(WomIdentifier(id)) - } - - def typedRunInputs: Map[String, Option[MyriadInputType]] = run.fold(RunToInputTypeMap).apply(parentName) - - def allIdentifiersRecursively(nodes: Set[GraphNode]): Set[WomIdentifier] = nodes.flatMap({ - case w: WorkflowCallNode=> Set(w.identifier) - case c: CommandCallNode => Set(c.identifier) - case e: ExpressionCallNode => Set(e.identifier) - // When a node a call node is being scattered over, it is wrapped inside a scatter node. We still don't want to - // duplicate it though so look inside scatter nodes to see if it's there. - case scatter: ScatterNode => allIdentifiersRecursively(scatter.innerGraph.nodes) - case _ => Set.empty[WomIdentifier] - }) - - // To avoid duplicating nodes, return immediately if we've already covered this node - val haveWeSeenThisStep: Boolean = allIdentifiersRecursively(knownNodes).contains(unqualifiedStepId) - - if (haveWeSeenThisStep) Right(knownNodes) - else { - val callable: Checked[Callable] = run match { - case Run.CommandLineTool(clt) => clt.buildTaskDefinition(validator, expressionLib) - case Run.Workflow(wf) => wf.womDefinition(validator, expressionLib) - case Run.ExpressionTool(et) => et.buildTaskDefinition(validator, expressionLib) - case oh => throw new Exception(s"Programmer Error! Unexpected case match: $oh") - } - - val callNodeBuilder = new CallNode.CallNodeBuilder() - - /* - * Method used to fold over the list of inputs declared by this step. - * Note that because we work on saladed CWL, all ids are fully qualified at this point (e.g: file:///path/to/file/three_step.cwl#cgrep/pattern - * The goal of this method is two fold (pardon the pun): - * 1) link each input of the step to an output port (which at this point can be from a different step or from a workflow input) - * 2) accumulate the nodes created along the way to achieve 1) - */ - def foldStepInput(currentFold: Checked[WorkflowStepInputFold], workflowStepInput: WorkflowStepInput): Checked[WorkflowStepInputFold] = currentFold flatMap { - fold => - /* - * Try to find in the given set an output port named stepOutputId in a call node named stepId - * This is useful when we've determined that the input points to an output of a different step and we want - * to get the corresponding output port. - */ - def findThisInputInSet(set: Set[GraphNode], stepId: String, stepOutputId: String): Checked[OutputPort] = { - for { - // We only care for outputPorts of call nodes or scatter nodes - call <- set.collectFirst { - case callNode: CallNode if callNode.localName == stepId => callNode - case scatterNode: ScatterNode if scatterNode.innerGraph.calls.exists(_.localName == stepId) => scatterNode - }. - toRight(NonEmptyList.one(s"stepId $stepId not found in known Nodes $set")) - output <- call.outputPorts.find(_.internalName == stepOutputId). - toRight(NonEmptyList.one(s"step output id $stepOutputId not found in ${call.outputPorts}")) - } yield output - } - - /* - * Build a wom node for the given step and return the newly created nodes - * This is useful when we've determined that the input belongs to an upstream step that we haven't covered yet - */ - def buildUpstreamNodes(upstreamStepId: String, accumulatedNodes: Set[GraphNode]): Checked[Set[GraphNode]] = - // Find the step corresponding to this upstreamStepId in the set of all the steps of this workflow - for { - step <- workflow.steps.find { step => cwl.FullyQualifiedName(step.id).id == upstreamStepId }. - toRight(NonEmptyList.one(s"no step of id $upstreamStepId found in ${workflow.steps.map(_.id).toList}")) - call <- step.callWithInputs(typeMap, workflow, accumulatedNodes, workflowInputs, validator, expressionLib) - } yield call - - def fromWorkflowInput(inputName: String): Checked[Map[String, OutputPort]] = { - // Try to find it in the workflow inputs map, if we can't it's an error - workflowInputs.collectFirst { - case (inputId, port) if inputName == inputId => Map(inputId -> port).asRight[NonEmptyList[String]] - } getOrElse s"Can't find workflow input for $inputName".invalidNelCheck[Map[String, OutputPort]] - } - - def fromStepOutput(stepId: String, stepOutputId: String, accumulatedNodes: Set[GraphNode]): Checked[(Map[String, OutputPort], Set[GraphNode])] = { - // First check if we've already built the WOM node for this step, and if so return the associated output port - findThisInputInSet(accumulatedNodes, stepId, stepOutputId).map(outputPort => (Map(s"$stepId/$stepOutputId" -> outputPort), accumulatedNodes)) - .orElse { - // Otherwise build the upstream nodes and look again in those newly created nodes - for { - newNodes <- buildUpstreamNodes(stepId, accumulatedNodes) - sourceMappings <- findThisInputInSet(newNodes, stepId, stepOutputId).map(outputPort => Map(s"$stepId/$stepOutputId" -> outputPort)) - } yield (sourceMappings, newNodes ++ accumulatedNodes) - } - } - - lazy val workflowStepInputId = cwl.FullyQualifiedName(workflowStepInput.id).id - - def updateFold(sourceMappings: Map[String, OutputPort], newNodes: Set[GraphNode]): Checked[WorkflowStepInputFold] = { - val typeExpectedByRunInput: Option[cwl.MyriadInputType] = typedRunInputs.get(workflowStepInputId).flatten - - val isThisStepScattered = isStepScattered(workflowStepInputId) - - workflowStepInput.toMergeNode(sourceMappings, expressionLib, typeExpectedByRunInput, isThisStepScattered, allRequirements.schemaDefRequirement) match { - // If the input needs a merge node, build it and add it to the input fold - case Some(mergeNode) => - mergeNode.toEither.map({ node => - fold |+| WorkflowStepInputFold( - mergeNodes = Map(workflowStepInput -> node), - generatedNodes = newNodes - ) - }) - case None => (fold |+| WorkflowStepInputFold(generatedNodes = newNodes)).validNelCheck - } - } - - /* - * We intend to validate that all of these sources point to a WOM Outputport that we know about. - * - * If we don't know about them, we find upstream nodes and build them (see "buildUpstreamNodes"). - */ - val baseCase = (Map.empty[String, OutputPort], fold.generatedNodes).asRight[NonEmptyList[String]] - val inputMappingsAndGraphNodes: Checked[(Map[String, OutputPort], Set[GraphNode])] = - workflowStepInput.sources.foldLeft(baseCase) { - case (Right((sourceMappings, graphNodes)), inputSource) => - /* - * Parse the inputSource (what this input is pointing to) - * 2 cases: - * - points to a workflow input - * - points to an upstream step - */ - cwl.FullyQualifiedName(inputSource) match { - // The source points to a workflow input, which means it should be in the workflowInputs map - case FileAndId(_, _, inputId) => fromWorkflowInput(inputId).map(newMap => (sourceMappings ++ newMap, graphNodes)) - // The source points to an output from a different step - case FileStepAndId(_, _, stepId, stepOutputId) => fromStepOutput(stepId, stepOutputId, graphNodes).map({ case (newMap, newNodes) => (sourceMappings ++ newMap, newNodes) }) - } - case (other, _) => other - } - - inputMappingsAndGraphNodes.flatMap((updateFold _).tupled) - } - - /* - * Folds over input definitions and build an InputDefinitionFold - */ - def foldInputDefinition(pointerNode: Map[String, GraphNodeWithSingleOutputPort]) - (inputDefinition: InputDefinition): ErrorOr[InputDefinitionFold] = { - inputDefinition match { - case _ if pointerNode.contains(inputDefinition.name) => - val expressionNode = pointerNode(inputDefinition.name) - InputDefinitionFold( - mappings = List(inputDefinition -> expressionNode.inputDefinitionPointer), - callInputPorts = Set(callNodeBuilder.makeInputPort(inputDefinition, expressionNode.singleOutputPort)) - ).validNel - - // No expression node mapping, use the default - case withDefault @ OverridableInputDefinitionWithDefault(_, _, expression, _, _) => - InputDefinitionFold( - mappings = List(withDefault -> Coproduct[InputDefinitionPointer](expression)) - ).validNel - - // Required input without default value and without mapping, this is a validation error - case RequiredInputDefinition(requiredName, _, _, _) => - s"Input ${requiredName.value} is required and is not bound to any value".invalidNel - - // Optional input without mapping, defaults to empty value - case optional: OptionalInputDefinition => - InputDefinitionFold( - mappings = List(optional -> Coproduct[InputDefinitionPointer](optional.womType.none: WomValue)) - ).validNel - case oh => throw new Exception(s"Programmer Error! Unexpected case match: $oh") - } - } - - /* - If the step is being scattered over, then merge nodes can't directly be referenced because they will be outside the scatter graph. - For inputs that are being scattered over, a scatter variable has already been created, but for the others we need - an OGIN to link the merge node to the inner scatter graph. - */ - def buildOGINs(mergeNodes: Map[WorkflowStepInput, ExpressionNode], - scatterVariables: Map[WorkflowStepInput, ScatterVariableNode]): Map[WorkflowStepInput, OuterGraphInputNode] = if (isScattered) { - mergeNodes - .collect({ - case (input, mergeNode) if !scatterVariables.contains(input) => - val ogin = OuterGraphInputNode( - WomIdentifier(input.parsedId).combine("OGIN"), - mergeNode.singleOutputPort, - preserveScatterIndex = false - ) - input -> ogin - }) - } else Map.empty - - /* - * For inputs that have a valueFrom field, create an ExpressionNode responsible for evaluating the expression. - * Note that this expression might need access to the other input values, so make each expression node depend on all other - * inputs. - */ - def buildStepInputValueFromNodes(sharedInputNodes: Map[WorkflowStepInput, GraphNodeWithSingleOutputPort]): Checked[Map[String, ExpressionNode]] = { - // Add new information to the typeMap from the shard input nodes. - lazy val updatedTypeMap = sharedInputNodes.map({ - // If the input node is a scatter variable, make sure the type is the item type, not the array type, as the expression node - // will operate on shards not on the whole scattered array. - case (stepInput, scatter: ScatterVariableNode) => stepInput.parsedId -> scatter.womType - case (stepInput, node) => stepInput.parsedId -> node.singleOutputPort.womType - }) ++ typeMap - - // Go over each step input and create an expression node for those which have a valueFrom - in.toList.collect({ - case stepInput @ WorkflowStepInput(_, _, _, _, Some(valueFrom)) => - // Transform the shared inputs map into a usable map to create the expression. - lazy val sharedInputMap: Map[String, OutputPort] = sharedInputNodes.map({ - case (siblingStepInput, graphNode) => siblingStepInput.parsedId -> graphNode.singleOutputPort - }) - val typeExpectedByRunInput: Option[cwl.MyriadInputType] = typedRunInputs.get(stepInput.parsedId).flatten - val isThisStepScattered = isStepScattered(stepInput.parsedId) - - stepInput.toExpressionNode(valueFrom, typeExpectedByRunInput, isThisStepScattered, sharedInputMap, updatedTypeMap, expressionLib, allRequirements.schemaDefRequirement).map(stepInput.parsedId -> _) - }) - .sequence[ErrorOr, (String, ExpressionNode)] - .toEither - .map(_.toMap) - } - - //inputs base case consist of the nodes we already know about - val baseCase = WorkflowStepInputFold(generatedNodes = knownNodes).asRight[NonEmptyList[String]] - - // WorkflowStepInputFold contains the mappings from step input to ExpressionNode as well as all created nodes - val stepInputFoldCheck: Checked[WorkflowStepInputFold] = in.foldLeft(baseCase)(foldStepInput) - - /* - * This (big) flatMap builds nodes from top to bottom in the diagram above. - * If necessary, the scatter node is built last as it wraps some of the other nodes. - */ - for { - /* ************************************ */ - /* ************ Merge Nodes *********** */ - /* ************************************ */ - // Build merge nodes and recursively generates other call nodes that we haven't seen so far - stepInputFold <- stepInputFoldCheck - // Extract the merge nodes from the fold - mergeNodes = stepInputFold.mergeNodes - - /* ************************************ */ - /* ****** Scatter Variable Nodes ****** */ - /* ************************************ */ - scatterVariableNodes <- ScatterLogic.buildScatterVariableNodes(scatter, mergeNodes, unqualifiedStepId.localName.value) - - /* ************************************ */ - /* *************** OGINS ************** */ - /* ************************************ */ - ogins = buildOGINs(mergeNodes, scatterVariableNodes) - - /* ************************************ */ - /* ********* Expression Nodes ********* */ - /* ************************************ */ - // Aggregate the generated nodes so far. This map will be used to generate expression nodes, so the order of aggregation matters: - // scatter variables and ogins take precedence over merge nodes (see diagram) - aggregatedMapForValueFromNodes = mergeNodes ++ scatterVariableNodes ++ ogins - // Build expression nodes for inputs that have a valueFrom field - stepInputValueFromNodes <- buildStepInputValueFromNodes(aggregatedMapForValueFromNodes) - - /* ************************************ */ - /* ************* Call Node ************ */ - /* ************************************ */ - // Get the callable object for this step - checkedCallable <- callable - // Aggregate again by adding generated expression nodes. Again order matters here, expression nodes override other nodes. - aggregatedMapForInputDefinitions = aggregatedMapForValueFromNodes.asIdentifierMap ++ stepInputValueFromNodes - // Assign each of the callable's input definition to an output port from the pointer map - inputDefinitionFold <- checkedCallable.inputs.foldMap(foldInputDefinition(aggregatedMapForInputDefinitions)).toEither - // Build the call node - callAndNodes = callNodeBuilder.build(unqualifiedStepId, checkedCallable, inputDefinitionFold, Set.empty, None) - // Depending on whether the step is being scattered, invoke the scatter node builder or not - - /* ************************************ */ - /* ************ Scatter Node ********** */ - /* ************************************ */ - scatterNodeOrExposedNodes <- if (isScattered) { - ScatterLogic.buildScatterNode( - callAndNodes, - NonEmptyList.fromListUnsafe(scatterVariableNodes.values.toList), - ogins.values.toSet, - stepInputValueFromNodes.values.toSet, - scatterMethod).map(Set(_)) - } else { - // If there's no scatter node then we need to return the expression nodes and the call node explicitly - // as they won't be contained in the scatter inner graph - (stepInputValueFromNodes.values.toSet + callAndNodes.node).validNelCheck - } - - /* - * Return all the nodes that need to be made available to the workflow graph: - * knownNodes: this method is used to fold over steps so we don't want to forget to accumulate known nodes - * mergeNodes: they're always outside of the scatter so always return them - * generatedNodes: nodes generated recursively to build this node - * scatterNodeOrExposedNodes: see explanation above - */ - allNodes = knownNodes ++ mergeNodes.values.toSet ++ stepInputFold.generatedNodes ++ scatterNodeOrExposedNodes - } yield allNodes - } - } -} - -/** - * @see WorkflowstepOutput - */ -case class WorkflowStepOutput(id: String) - -object WorkflowStep { - - // A monoid can't be derived automatically for this class because it contains a Map[String, ExpressionNode], - // and there's no monoid defined over ExpressionNode - implicit val workflowStepInputFoldMonoid: Monoid[WorkflowStepInputFold] = new Monoid[WorkflowStepInputFold] { - override def empty: WorkflowStepInputFold = WorkflowStepInputFold() - override def combine(x: WorkflowStepInputFold, y: WorkflowStepInputFold): WorkflowStepInputFold = { - WorkflowStepInputFold( - mergeNodes = x.mergeNodes ++ y.mergeNodes, - generatedNodes = x.generatedNodes ++ y.generatedNodes - ) - } - } - - private [cwl] case class WorkflowStepInputFold(mergeNodes: Map[WorkflowStepInput, ExpressionNode] = Map.empty, - generatedNodes: Set[GraphNode] = Set.empty) - - /** - * Maps input variable (to be scattered over) to their scatter variable node - */ - type ScatterMappings = Map[ExpressionNode, ScatterVariableNode] - - val emptyOutputs: WorkflowStepOutputType = Array.empty - - type Run = - String :+: - CommandLineTool :+: - ExpressionTool :+: - Workflow :+: - CNil - - object Run { - object String { def unapply(run: Run): Option[String] = run.select[String] } - object Workflow { def unapply(run: Run): Option[Workflow] = run.select[Workflow] } - object CommandLineTool { def unapply(run: Run): Option[CommandLineTool] = run.select[CommandLineTool] } - object ExpressionTool { def unapply(run: Run): Option[ExpressionTool] = run.select[ExpressionTool] } - } - - type WorkflowStepOutputInnerType = String :+: WorkflowStepOutput :+: CNil - type WorkflowStepOutputType = Array[WorkflowStepOutputInnerType] -} diff --git a/cwl/src/main/scala/cwl/WorkflowStepInput.scala b/cwl/src/main/scala/cwl/WorkflowStepInput.scala deleted file mode 100644 index a3d028dbdd6..00000000000 --- a/cwl/src/main/scala/cwl/WorkflowStepInput.scala +++ /dev/null @@ -1,155 +0,0 @@ -package cwl - -import cats.data.NonEmptyList -import cats.syntax.either._ -import common.Checked -import common.validation.Checked._ -import common.validation.ErrorOr.ErrorOr -import cwl.InputParameter.DefaultToWomValuePoly -import cwl.LinkMergeMethod.LinkMergeMethod -import cwl.WorkflowStepInput.InputSource -import cwl.command.ParentName -import shapeless.{:+:, CNil} -import wom.expression.ValueAsAnExpression -import wom.graph.GraphNodePort.OutputPort -import wom.graph.WomIdentifier -import wom.graph.expression.{AnonymousExpressionNode, ExposedExpressionNode, ExpressionNode, PlainAnonymousExpressionNode} -import wom.types._ - -case class WorkflowStepInput( - id: String, - source: Option[InputSource] = None, - linkMerge: Option[LinkMergeMethod] = None, - default: Option[CwlAny] = None, - valueFrom: Option[StringOrExpression] = None) { - - def parsedId(implicit parentName: ParentName) = FullyQualifiedName(id).id - - def toExpressionNode(valueFromExpression: StringOrExpression, - runInputExpectedType: Option[cwl.MyriadInputType], - isScattered: Boolean, - sourceMappings:Map[String, OutputPort], - outputTypeMap: Map[String, WomType], - expressionLib: ExpressionLib, - schemaDefRequirement: SchemaDefRequirement - )(implicit parentName: ParentName): ErrorOr[ExpressionNode] = { - val inputs = sourceMappings.keySet - val upstreamMergeType = outputTypeMap.get(parsedId) - - (for { - // we may have several sources, we make sure to have a type common to all of them. - // In the case where there's no input source, we currently wrap the valueFrom value in a WomString (see WorkflowStepInputExpression) - inputType <- WorkflowStepInput.determineValueFromType(upstreamMergeType, runInputExpectedType, isScattered, schemaDefRequirement) - womExpression = WorkflowStepInputExpression(parsedId, valueFromExpression, inputType, inputs, expressionLib) - identifier = WomIdentifier(id).combine("expression") - ret <- ExposedExpressionNode.fromInputMapping(identifier, womExpression, inputType, sourceMappings).toEither - } yield ret).toValidated - } - - /** - * - * @param sourceMappings The outputports to which this source refers - * @param matchingRunInputType This input matches an input declared in the workflowstep's "run". This is that step's declared type - * @return - */ - def toMergeNode(sourceMappings: Map[String, OutputPort], - expressionLib: ExpressionLib, - matchingRunInputType: Option[MyriadInputType], - isScattered: Boolean, - schemaDefRequirement: SchemaDefRequirement - ): Option[ErrorOr[ExpressionNode]] = { - - val identifier = WomIdentifier(id).combine("merge") - val mapType = sourceMappings.map({ case (k, v) => k -> v.womType }) - - val maybeMatchingRunInputWomType: Option[WomType] = matchingRunInputType.map(_.fold(MyriadInputTypeToWomType).apply(schemaDefRequirement)) - - def makeNode(head: (String, OutputPort), tail: List[(String, OutputPort)]) = for { - inputType <- determineMergeType(mapType, maybeMatchingRunInputWomType) - womExpression = WorkflowStepInputMergeExpression(this, inputType, NonEmptyList.of(head, tail: _*), expressionLib) - node <- AnonymousExpressionNode.fromInputMapping(identifier, womExpression, sourceMappings, PlainAnonymousExpressionNode.apply).toEither - } yield node - - val matchingRunInputWomType: WomType = maybeMatchingRunInputWomType.getOrElse(WomAnyType) - lazy val defaultValue = default.map { _.fold(DefaultToWomValuePoly).apply(matchingRunInputWomType) } - - sourceMappings.toList match { - case head :: tail => Option(makeNode(head, tail).toValidated) - case Nil => - defaultValue map { _ map { d => - PlainAnonymousExpressionNode(identifier, ValueAsAnExpression(d), matchingRunInputWomType, Map.empty) - } } - } - } - - def determineMergeType(sources: Map[String, WomType], expectedTypeAsWom: Option[WomType]): Checked[WomType] = { - WorkflowStepInput.determineMergeType(sources, linkMerge, expectedTypeAsWom, default.isDefined) - } - - lazy val sources: List[String] = source.toList.flatMap(_.fold(StringOrStringArrayToStringList)) - - lazy val effectiveLinkMerge: LinkMergeMethod = linkMerge.getOrElse(LinkMergeMethod.MergeNested) -} - -object WorkflowStepInput { - type InputSource = String :+: Array[String] :+: CNil - - implicit class EnhancedStepInputMap[A](val map: Map[WorkflowStepInput, A]) extends AnyVal { - def asIdentifierMap(implicit parentName: ParentName): Map[String, A] = { - map.map({ case (stepInput, value) => stepInput.parsedId -> value }) - } - } - - def determineValueFromType(mergedSourcesType: Option[WomType], - expectedType: Option[MyriadInputType], - isScattered: Boolean, - schemaDefRequirement: SchemaDefRequirement): Checked[WomType] = { - val expectedTypeAsWom: Option[WomType] = expectedType.map(_.fold(MyriadInputTypeToWomType).apply(schemaDefRequirement)) - - expectedTypeAsWom.getOrElse(WomStringType).asRight - } - - def determineMergeType(sources: Map[String, WomType], - linkMerge: Option[LinkMergeMethod], - expectedTypeAsWom: Option[WomType], - hasDefault: Boolean): Checked[WomType] = { - - (sources.toList, expectedTypeAsWom, linkMerge) match { - // If there is a single source and no explicit merge method, use the type of the source (unpacked if it's optional - // and there's a default since the default would be used if there's no supplied value). - case (List((_, WomOptionalType(sourceType))), _, None) if hasDefault => sourceType.asRight - case (List((_, opt @ WomOptionalType(_))), _, None) => opt.asRight - case (List((_, sourceType)), _, None) => sourceType.asRight - // If there is a single source and merge nested is specified, wrap it into an array - case (List((_, sourceType)), _, Some(LinkMergeMethod.MergeNested)) => WomArrayType(sourceType).asRight - // If there are multiple sources and either no merge method or merge nested, find the closest common type. - // TODO: This is technically not enough, cwltool supports sources with totally different types, creating an array with multiple types - // Maybe WomCoproduct can help - case (_, _, Some(LinkMergeMethod.MergeNested) | None) => WomArrayType(WomType.homogeneousTypeFromTypes(sources.values)).asRight - - //If sink parameter is an array and merge_flattened is used, must validate input & output types are equivalent before proceeding - case (_, Some(arrayType @ WomArrayType(itemType)), Some(LinkMergeMethod.MergeFlattened)) if typesToItemMatch(sources.values, itemType) => arrayType.asRight - // If sink parameter is not an array and merge flattened is used, validate that the sources types matche the sink type - case (_, Some(targetType), Some(LinkMergeMethod.MergeFlattened)) if typesToItemMatch(sources.values, targetType) => WomArrayType(targetType).asRight - // If the types are not compatible, fail - case (_, Some(targetType), Some(LinkMergeMethod.MergeFlattened)) => - s"could not verify that types $sources and the items type of the run's InputArraySchema $targetType were compatible".invalidNelCheck - - //We don't have type information from the run input so we gather up the sources and try to determine a common type amongst them. - case _ => WomType.homogeneousTypeFromTypes(sources.values).asRight - } - } - - def typesToItemMatch(lst: Iterable[WomType], target: WomType): Boolean = { - val effectiveInputType = WomType.homogeneousTypeFromTypes(lst) - - typeToItemMatch(effectiveInputType, target) - } - - def typeToItemMatch(upstream: WomType, downstream: WomType): Boolean = { - upstream match { - case WomType.RecursiveType(innerType) => typeToItemMatch(innerType, downstream) - case other => other == downstream - } - } -} diff --git a/cwl/src/main/scala/cwl/WorkflowStepInputExpression.scala b/cwl/src/main/scala/cwl/WorkflowStepInputExpression.scala deleted file mode 100644 index 476d7f44ef5..00000000000 --- a/cwl/src/main/scala/cwl/WorkflowStepInputExpression.scala +++ /dev/null @@ -1,51 +0,0 @@ -package cwl - -import cats.syntax.validated._ -import wom.expression.{FileEvaluation, IoFunctionSet} -import wom.types._ -import wom.values._ - -final case class WorkflowStepInputExpression(inputName: String, - valueFrom: StringOrExpression, - override val cwlExpressionType: WomType, - inputs: Set[String], - override val expressionLib: ExpressionLib) extends CwlWomExpression { - - override def sourceString = inputName - - override def evaluateValue(inputValues: Map[String, WomValue], ioFunctionSet: IoFunctionSet) = { - valueFrom match { - // If valueFrom is a constant string value, use this as the value for this input parameter. - // TODO: need to handle case where this is a parameter reference, it currently looks like a String to us! - case StringOrExpression.String(value) => WomString(value).validNel - - /* - * If valueFrom is a parameter reference or expression, it must be evaluated to yield the actual value to be assiged to the input field. - * - * The self value of in the parameter reference or expression must be the value of the parameter(s) specified in the source field, - * or null if there is no source field. - * - * The value of inputs in the parameter reference or expression must be the input object to the workflow step after - * assigning the source values, applying default, and then scattering. The order of evaluating valueFrom among step - * input parameters is undefined and the result of evaluating valueFrom on a parameter must not be visible to - * evaluation of valueFrom on other parameters. - */ - case StringOrExpression.Expression(expression) => - //used to determine the value of "self" as expected by CWL Spec - def selfValue = inputValues.get(inputName) match { - case Some(value) => value - case None => WomOptionalValue(WomNothingType, None) - } - - val parameterContext = ParameterContext(ioFunctionSet, expressionLib, inputValues, selfValue) - - expression.fold(EvaluateExpression).apply(parameterContext) - case oh => throw new Exception(s"Programmer Error! Unexpected case match: $oh") - } - } - - //this is input, so not producing any output files - override def evaluateFiles(inputTypes: Map[String, WomValue], ioFunctionSet: IoFunctionSet, coerceTo: WomType) = - Set.empty[FileEvaluation].validNel -} - diff --git a/cwl/src/main/scala/cwl/WorkflowStepInputMergeExpression.scala b/cwl/src/main/scala/cwl/WorkflowStepInputMergeExpression.scala deleted file mode 100644 index bc7d4f43d75..00000000000 --- a/cwl/src/main/scala/cwl/WorkflowStepInputMergeExpression.scala +++ /dev/null @@ -1,84 +0,0 @@ -package cwl - -import cats.data.NonEmptyList -import cats.syntax.either._ -import cats.syntax.option._ -import cats.syntax.traverse._ -import cats.syntax.validated._ -import cats.instances.list._ -import common.Checked -import common.validation.ErrorOr.ErrorOr -import cwl.InputParameter.DefaultToWomValuePoly -import wom.expression.{FileEvaluation, IoFunctionSet} -import wom.graph.GraphNodePort.OutputPort -import wom.types.WomType -import wom.values.{WomArray, WomFile, WomOptionalValue, WomValue} - -final case class WorkflowStepInputMergeExpression(input: WorkflowStepInput, - cwlExpressionType: WomType, - stepInputMappingHead: NonEmptyList[(String, OutputPort)], - override val expressionLib: ExpressionLib) extends CwlWomExpression { - - private val allStepInputMappings = stepInputMappingHead.toList - private val allStepInputSources = allStepInputMappings.map(_._1) - - override def sourceString: String = s"${input.id}-Merge-Expression" - override def inputs: Set[String] = allStepInputSources.toSet - - override def evaluateValue(inputValues: Map[String, WomValue], ioFunctionSet: IoFunctionSet): ErrorOr[WomValue] = { - def lookupValue(key: String): ErrorOr[WomValue] = - inputValues. - get(key). - toValidNel(s"source value $key not found in input values ${inputValues.mkString("\n")} and no default value provided. Graph Inputs were ${allStepInputSources.mkString("\n")}") - - def validateSources(sources: List[String]): ErrorOr[List[WomValue]] = - sources. - traverse(lookupValue) - - def isEmptyOptionalValue(womValue: WomValue): Boolean = womValue match { - case WomOptionalValue(_, None) => true - case _ => false - } - - (allStepInputSources, input.effectiveLinkMerge, input.default) match { - // When we have a single source but no value was provided for it and there's a default. - case (List(source), LinkMergeMethod.MergeNested, Some(default)) if isEmptyOptionalValue(inputValues(source)) => - default.fold(DefaultToWomValuePoly).apply(cwlExpressionType) - - case (List(source), LinkMergeMethod.MergeNested, _) => lookupValue(source) - - //When we have several sources, validate they are all present and provide them as a nested array - case (sources, LinkMergeMethod.MergeNested, _) => validateSources(sources).map(WomArray.apply) - - case (sources, LinkMergeMethod.MergeFlattened, _) => - val validatedSourceValues: Checked[List[WomValue]] = - validateSources(sources).toEither - - def flatten: WomValue => List[WomValue] = { - case WomArray(_, value) => value.toList - case WomOptionalValue(_, Some(value)) => flatten(value) - case other => List(other) - } - - //This is the meat of "merge_flattened," where we find arrays and concatenate them to form one array - val flattenedValidatedSourceValues: Checked[List[WomValue]] = validatedSourceValues.map(_.flatMap(flatten)) - - flattenedValidatedSourceValues.map(list => WomArray(list)).toValidated - - case (List(id), _, _) => lookupValue(id) - case oh => throw new Exception(s"Programmer Error! Unexpected case match: $oh") - } - } - - override def evaluateFiles(inputTypes: Map[String, WomValue], ioFunctionSet: IoFunctionSet, coerceTo: WomType): ErrorOr[Set[FileEvaluation]] = { - if (allStepInputMappings.size > 1) { - // TODO add MultipleInputFeatureRequirement logic in here - "MultipleInputFeatureRequirement not supported yet".invalidNel - } else { - val (inputName, _) = allStepInputMappings.head - inputTypes(inputName).collectAsSeq({ - case file: WomFile => file - }).toSet[WomFile].map(FileEvaluation.requiredFile).validNel - } - } -} diff --git a/cwl/src/main/scala/cwl/command/ParentName.scala b/cwl/src/main/scala/cwl/command/ParentName.scala deleted file mode 100644 index 6e34096d4f3..00000000000 --- a/cwl/src/main/scala/cwl/command/ParentName.scala +++ /dev/null @@ -1,10 +0,0 @@ -package cwl.command - -object ParentName { - def empty: ParentName = ParentName(None) - def apply(id: String): ParentName = ParentName(id.split("#").tail.headOption) -} - -case class ParentName(value: Option[String]) { - def stripParent(in: String) = value.map(v => in.stripPrefix(s"$v/")).getOrElse(in) -} diff --git a/cwl/src/main/scala/cwl/command/StringCommandPart.scala b/cwl/src/main/scala/cwl/command/StringCommandPart.scala deleted file mode 100644 index 9ec668231fb..00000000000 --- a/cwl/src/main/scala/cwl/command/StringCommandPart.scala +++ /dev/null @@ -1,18 +0,0 @@ -package cwl.command - -import cats.syntax.validated._ -import common.validation.ErrorOr.ErrorOr -import wom._ -import wom.callable.RuntimeEnvironment -import wom.expression.IoFunctionSet -import wom.graph.LocalName -import wom.values._ - -case class StringCommandPart(literal: String) extends CommandPart { - override def instantiate(inputsMap: Map[LocalName, WomValue], - functions: IoFunctionSet, - valueMapper: (WomValue) => WomValue, - runtimeEnvironment: RuntimeEnvironment): ErrorOr[List[InstantiatedCommand]] = - // TODO CWL shellquotes by default, but this shellquotes everything. Only shellquote what should be shellquoted. - List(InstantiatedCommand(literal.shellQuote)).validNel -} diff --git a/cwl/src/main/scala/cwl/internal/CommandPartSortingAlgorithm.scala b/cwl/src/main/scala/cwl/internal/CommandPartSortingAlgorithm.scala deleted file mode 100644 index eb6ed0410ce..00000000000 --- a/cwl/src/main/scala/cwl/internal/CommandPartSortingAlgorithm.scala +++ /dev/null @@ -1,71 +0,0 @@ -package cwl.internal - -import cats.data.Kleisli._ -import cats.data.ReaderT -import cats.data.Validated._ -import cats.syntax.traverse._ -import cats.syntax.validated._ -import cats.instances.list._ -import cwl.CommandLineTool._ -import cwl.command.ParentName -import cwl.{ArgumentCommandLineBinding, ArgumentToCommandPart, CommandLineTool, CommandPartExpression, FullyQualifiedName, InputParameter, MyriadInputTypeToSortedCommandParts, MyriadInputTypeToWomType} -import shapeless.Coproduct -import wom.types.WomStringType - -object CommandPartSortingAlgorithm { - def argumentCommandParts(arguments: Option[Array[CommandLineTool.Argument]]): CommandPartExpression[List[SortKeyAndCommandPart]] = - // arguments is an Option[Array[Argument]], the toList.flatten gives a List[Argument] - arguments.toList.flatten - // zip the index because we need it in the sorting key - .zipWithIndex.flatTraverse(argumentToCommandPart.tupled) - - def argumentToCommandPart: (Argument, Int) => CommandPartExpression[List[SortKeyAndCommandPart]] = (argument, index) => ReaderT { - case ((requirementsAndHints, expressionLib, _)) => - val part = argument.fold(ArgumentToCommandPart).apply(requirementsAndHints.hasShellCommandRequirement, expressionLib) - // Get the position from the binding if there is one - val position = argument.select[ArgumentCommandLineBinding].flatMap(_.position) - .map(Coproduct[StringOrInt](_)).getOrElse(CommandLineTool.DefaultPosition) - - // The key consists of the position followed by the index - val sortingKey = CommandBindingSortingKey(List(position, Coproduct[StringOrInt](index))) - - List(SortKeyAndCommandPart(sortingKey, part)).validNel - - } - - def inputBindingsCommandParts(inputs: Array[CommandInputParameter]): CommandPartExpression[List[SortKeyAndCommandPart]] = - inputs.toList.flatTraverse(inputBindingsCommandPart) - - def inputBindingsCommandPart(inputParameter: CommandInputParameter): CommandPartExpression[List[SortKeyAndCommandPart]] = - ReaderT{ case ((hintsAndRequirements, expressionLib, inputValues)) => - val parsedName = FullyQualifiedName(inputParameter.id)(ParentName.empty).id - - val womType = inputParameter.`type`.map(_.fold(MyriadInputTypeToWomType).apply(hintsAndRequirements.schemaDefRequirement)).getOrElse(WomStringType) - - val defaultValue = inputParameter.default.map(_.fold(InputParameter.DefaultToWomValuePoly).apply(womType)) - - inputValues - .collectFirst({ case (inputDefinition, womValue) if inputDefinition.name == parsedName => womValue.validNel }) - .orElse(defaultValue) match { - case Some(Valid(value)) => - // See http://www.commonwl.org/v1.0/CommandLineTool.html#Input_binding - lazy val initialKey = CommandBindingSortingKey.empty - .append(inputParameter.inputBinding, Coproduct[StringOrInt](parsedName)) - - inputParameter.`type`.toList. - flatMap{ - _.fold(MyriadInputTypeToSortedCommandParts). - apply( - inputParameter.inputBinding, - value, - initialKey.asNewKey, - hintsAndRequirements.hasShellCommandRequirement, - expressionLib, - hintsAndRequirements.schemaDefRequirement) - }.validNel - case Some(Invalid(errors)) => Invalid(errors) - case None => s"Could not find an input value for input $parsedName in ${inputValues.prettyString}".invalidNel - } - } - -} diff --git a/cwl/src/main/scala/cwl/internal/CwlEcmaScriptDecoder.scala b/cwl/src/main/scala/cwl/internal/CwlEcmaScriptDecoder.scala deleted file mode 100644 index f7f54d3db1c..00000000000 --- a/cwl/src/main/scala/cwl/internal/CwlEcmaScriptDecoder.scala +++ /dev/null @@ -1,124 +0,0 @@ -package cwl.internal - -import cats.syntax.apply._ -import cats.syntax.traverse._ -import cats.syntax.validated._ -import cats.instances.list._ -import cats.instances.option._ -import common.validation.ErrorOr._ -import common.validation.Validation._ -import cwl.{Directory, File, FileOrDirectory} -import org.mozilla.javascript.{ConsString, NativeArray, NativeObject} -import shapeless.Coproduct -import wom.types.WomNothingType -import wom.values._ - -import scala.jdk.CollectionConverters._ - -class CwlEcmaScriptDecoder { - - def decode(value: AnyRef): ErrorOr[WomValue] = - value match { - case map: NativeObject if map.get("class") == "File" => decodeFile(map.asScala.toMap).flatMap(_.asWomValue) - case map: NativeObject if map.get("class") == "Directory" => decodeDirectory(map.asScala.toMap).flatMap(_.asWomValue) - - case map: NativeObject => decodeMap(map.asScala.toMap) - case array: NativeArray => - val anyList = array.asScala.toList - val anyRefArray = anyList.asInstanceOf[List[AnyRef]] - anyRefArray.traverse(decode).map(WomArray.apply) - - //we represent nulls as this type because Wom doesn't have a "null" value, but it does have a nothing type - //If you wish this to be otherwise please tidy up the Expression interpolator as well - case null => WomOptionalValue(WomNothingType, None).valid - - case string: String => WomString(string).valid - case consString: ConsString => WomString(consString.toString).valid - case int: java.lang.Integer => WomInteger(int).valid - case long: java.lang.Long => WomLong(long).valid - case double: java.lang.Double if double == double.doubleValue.floor && !double.isInfinite => - WomInteger(double.intValue).valid - case double: java.lang.Double => WomFloat(double).valid - case boolean: java.lang.Boolean => WomBoolean(boolean).valid - case unknown => s"While decoding the output $value of the Javascript interpreter, we encountered $unknown and were unable to reify it.".invalidNel - } - - def decodeMap(map: Map[Any, Any]): ErrorOr[WomValue] = { - val realMap: Map[AnyRef, AnyRef] = map.asInstanceOf[Map[AnyRef, AnyRef]] - - val tupleList = realMap.toList.traverse{ - case (k,v) => (k.toString.validNel: ErrorOr[String], decode(v)).tupled - } - val mapWomValues = tupleList.map(_.toMap) - mapWomValues.map(WomObject.apply) - } - - /** - * Called to decode a cwl File or Directory. - */ - def decodeDirectoryOrFile(value: Any): ErrorOr[FileOrDirectory] = { - val invalidValue = s"Not a valid CWL map or directory: $value".invalidNel - - value match { - case map: NativeObject if map.get("class") == "File" => decodeFile(map.asScala.toMap).map(Coproduct[FileOrDirectory](_)) - case map: NativeObject if map.get("class") == "Directory" => decodeDirectory(map.asScala.toMap).map(Coproduct[FileOrDirectory](_)) - case _ => invalidValue - } - } - - /** - * Called to decode an array of cwl File or Directory instances. - */ - def decodeDirectoryOrFiles(value: Any): ErrorOr[Array[FileOrDirectory]] = { - value match { - case na: NativeArray => na.asScala.toList.traverse(decodeDirectoryOrFile).map(_.toArray) - } - } - - /** - * Called to decode a map value using a supplied function. - */ - def decodeMapValue[A](map: Map[Any, Any], key: String, f: Any => A): ErrorOr[Option[A]] = { - map.get(key).traverse(anyRef => validate(f(anyRef))) - } - - /** - * Called to decode an array of files or directories from a map value. - */ - def decodeMapDirectoryOrFiles(map: Map[Any, Any], - key: String): ErrorOr[Option[Array[FileOrDirectory]]] = { - map.get(key).traverse(decodeDirectoryOrFiles) - } - - /** - * Called to decode a cwl File. - */ - def decodeFile(map: Map[Any, Any]): ErrorOr[File] = { - val location = decodeMapValue(map, "location", _.toString) - val path = decodeMapValue(map, "path", _.toString) - val basename = decodeMapValue(map, "basename", _.toString) - val checksum = decodeMapValue(map, "checksum", _.toString) - val size = decodeMapValue(map, "size", _.toString.toDouble.toLong) - val secondaryFiles = decodeMapDirectoryOrFiles(map, "secondaryFiles") - val format = decodeMapValue(map, "format", _.toString) - val contents = decodeMapValue(map, "contents", _.toString) - - (location, path, basename, checksum, size, secondaryFiles, format, contents).mapN( - File(_, _, _, _, _, _, _, _) - ) - } - - /** - * Called to decode a cwl Directory. - */ - def decodeDirectory(map: Map[Any, Any]): ErrorOr[Directory] = { - val location = decodeMapValue(map, "location", _.toString) - val path = decodeMapValue(map, "path", _.toString) - val basename = decodeMapValue(map, "basename", _.toString) - val listing = decodeMapDirectoryOrFiles(map, "listing") - - (location, path, basename, listing).mapN( - Directory(_, _, _, _) - ) - } -} diff --git a/cwl/src/main/scala/cwl/internal/EcmaScriptEncoder.scala b/cwl/src/main/scala/cwl/internal/EcmaScriptEncoder.scala deleted file mode 100644 index e4aed51e215..00000000000 --- a/cwl/src/main/scala/cwl/internal/EcmaScriptEncoder.scala +++ /dev/null @@ -1,123 +0,0 @@ -package cwl.internal - -import cats.data.Validated.Valid -import common.validation.ErrorOr.ErrorOr -import cwl.internal.EcmaScriptUtil.{ECMAScriptVariable, ESArray, ESObject, ESPrimitive} -import cwl.{Directory, File} -import mouse.all._ -import wom.values._ - -/** - * Converts a WomValue into a javascript compatible value. - */ -class EcmaScriptEncoder { - - /** - * Base implementation converts any WomPrimitive (except WomFile) into a javascript compatible value. - * - * Inputs, and returned output must be one of: - * - WomString - * - WomBoolean - * - WomFloat - * - WomInteger - * - WomMap - * - WomArray - * - A "WomNull" equal to WomOptionalValue(WomNothingType, None) - * - * The WomMap keys and values, and WomArray elements must be the one of the above, recursively. - * - * Instances of WomFile are not permitted, and must be already converted to one of the above types. - * - * @param value A WOM value. - * @return The javascript equivalent. - */ - def encode(value: WomValue): ECMAScriptVariable = { - value match { - case file: WomFile => encodeFileOrDirectory(file) - case WomOptionalValue(_, None) => ESPrimitive(null) - case WomOptionalValue(_, Some(innerValue)) => encode(innerValue) - case WomString(string) => string |> ESPrimitive - case WomInteger(int) => Int.box(int) |> ESPrimitive - case WomLong(long) => Long.box(long) |> ESPrimitive - case WomFloat(double) => Double.box(double) |> ESPrimitive - case WomBoolean(boolean) => Boolean.box(boolean) |> ESPrimitive - case WomArray(_, array) => array.toList.map(encode).toArray |> ESArray - case WomMap(_, map) => map.map{ - case (mapKey, mapValue) => (encodeString(mapKey), encode(mapValue)) - } |> ESObject - case objectLike: WomObjectLike => objectLike.values.map{ - case (key, innerValue) => (key, encode(innerValue)) - } |> ESObject - case WomCoproductValue(_, womValue) => encode(womValue) - case WomEnumerationValue(_, womValue) => womValue |> ESPrimitive - case _ => throw new RuntimeException(s"$getClass is unable to encode value: $value") - } - } - - def encodeString(value: WomValue): String = { - encode(value) match { - case ESPrimitive(string: String) => string - - //In the case of a non-string, we evaluate a small snippet of Ecma script meant to coerce the object to a string - // http://2ality.com/2012/03/converting-to-string.html - case _ => - val jsString: ErrorOr[WomValue] = EcmaScriptUtil.evalStructish(""""" + other""","other" -> value, encoder = this) - jsString match { - case Valid(WomString(string)) => string - case unexpected => throw new RuntimeException(s"Expected to convert '$value' to a String but ended up with '$unexpected'") - } - } - } - - /** - * Encodes a sequence of wom file or directory values. - */ - def encodeFileOrDirectories(values: Seq[WomFile]): ESArray = { - ESArray(values.toList.map(encodeFileOrDirectory).toArray) - } - - /** - * Encodes a wom file or directory value. - */ - def encodeFileOrDirectory(value: WomFile): ECMAScriptVariable = { - value match { - case directory: WomUnlistedDirectory => encodeDirectory(WomMaybeListedDirectory(directory.value)) - case file: WomSingleFile => encodeFile(WomMaybePopulatedFile(file.value)) - case glob: WomGlobFile => encodeFile(WomMaybePopulatedFile(glob.value)) - case directory: WomMaybeListedDirectory => encodeDirectory(directory) - case file: WomMaybePopulatedFile => encodeFile(file) - } - } - - /** - * Encodes a wom file. - */ - def encodeFile(file: WomMaybePopulatedFile): ECMAScriptVariable = - List( - Option("class" -> ESPrimitive("File")), - file.valueOption.map("location" -> ESPrimitive(_)), - file.valueOption.map("path" -> ESPrimitive(_)), - Option("basename" -> (File.basename(file.value) |> ESPrimitive)), - Option("dirname" -> (File.dirname(file.value) |> ESPrimitive)), - Option("nameroot" -> (File.nameroot(file.value) |> ESPrimitive)), - Option("nameext" -> (File.nameext(file.value) |> ESPrimitive)), - file.checksumOption.map("checksum" -> ESPrimitive(_)), - file.sizeOption.map(Long.box).map("size" -> ESPrimitive(_)), - Option("secondaryFiles" -> encodeFileOrDirectories(file.secondaryFiles)), - file.formatOption.map("format" -> ESPrimitive(_)), - file.contentsOption.map("contents" -> ESPrimitive(_)) - ).flatten.toMap |> ESObject - - /** - * Encodes a wom directory. - */ - def encodeDirectory(directory: WomMaybeListedDirectory): ESObject = { - List( - Option("class" -> ESPrimitive("Directory")), - directory.valueOption.map("location" -> ESPrimitive(_)), - Option(directory.value).map("path" -> ESPrimitive(_)), - Option("basename" -> ESPrimitive(Directory.basename(directory.value))), - directory.listingOption.map(encodeFileOrDirectories).map("listing" -> _) - ).flatten.toMap |> ESObject - } -} diff --git a/cwl/src/main/scala/cwl/internal/EcmaScriptUtil.scala b/cwl/src/main/scala/cwl/internal/EcmaScriptUtil.scala deleted file mode 100644 index d2c5def745f..00000000000 --- a/cwl/src/main/scala/cwl/internal/EcmaScriptUtil.scala +++ /dev/null @@ -1,128 +0,0 @@ -package cwl.internal - -import common.collections.EnhancedCollections._ -import common.validation.ErrorOr._ -import common.validation.Validation._ -import org.mozilla.javascript._ -import wom.values._ - -import scala.concurrent.duration._ -import scala.util.Try - -/** - * This implementation depends on Mozilla Rhino. - * - * Previously we attempted to use Nashorn which is built into the JDK, but we encountered at least 2 situations where - * it didn't work and we found no workarounds to satisfy the use cases. Namely, JSON.stringify of a Map and calling "sort" on an array. - */ -//noinspection VariablePatternShadow -object EcmaScriptUtil { - def writeValue(value: ECMAScriptVariable)(context: Context, scope: Scriptable): AnyRef = - value match { - case ESObject(fields) => - val newObj = context.newObject(scope) - - fields.toList.foreach{ - case (name, value) => - val newerObj = writeValue(value)(context, scope) - ScriptableObject.putProperty(newObj, name, newerObj) - } - newObj - - case ESArray(array) => - val newObj = context.newArray(scope, array.length) - - array.toList.zipWithIndex.foreach { - case (js, index) => - - val newerObj = writeValue(js)(context, scope) - ScriptableObject.putProperty(newObj, index, newerObj) - - } - newObj - - case ESPrimitive(obj) => obj - } - - /** - * Runs ECMAScript as JS1.8 (aka ES5) - * - * Uses RhinoSandbox to reduce chances injected JS wreaks havoc on the JVM. - * - * @see https://en.wikipedia.org/wiki/ECMAScript#Version_correspondence - */ - def evalRaw(expr: String)(block: (Context, Scriptable) => Unit): AnyRef = { - // TODO: Parameterize and update callers to pass in source name, max duration, max instructions, etc.? - // For now, be very liberal with scripts giving 60 seconds of unrestricted CPU usage and unlimited instructions. - val sourceName = "" - val maxDuration: Duration = 60.seconds - val maxInstructionsOption: Option[Int] = None - val strict = true - val languageVersionOption = Option(Context.VERSION_1_8) - - val sandbox = new EnhancedRhinoSandbox(strict, languageVersionOption) - if (maxDuration.isFinite) { - sandbox.setMaxDuration(maxDuration.toMillis.toInt) - } - maxInstructionsOption foreach sandbox.setInstructionLimit - sandbox.setUseSafeStandardObjects(true) - sandbox.setUseSealedScope(true) - - sandbox.eval(sourceName, expr)(block) - } - - sealed trait ECMAScriptVariable - - case class ESObject(fields: Map[String, ECMAScriptVariable]) extends ECMAScriptVariable - case class ESArray(array: Array[ECMAScriptVariable]) extends ECMAScriptVariable { - override def toString: String = s"ESArray(${array.toList})" - } - case class ESPrimitive(anyRef: AnyRef) extends ECMAScriptVariable - - /** - * Evaluates a javascript expression using maps of WOM values. - * - * TODO: Once custom types are supported as WomValue, this custom method won't be required. - * - * @param expr The javascript expression. - * @param rawValues A map filled with WOM values. - * @param mapValues A map of maps filled with WOM values of various types. - * @param encoder Encodes wom values to javascript. - * @param decoder Decodes wom values from javascript. - * @return The result of the expression. - */ - def evalStructish(expr: String, - rawValues: (String, WomValue), - mapValues: Map[String, Map[String, WomValue]] = Map.empty, - encoder: EcmaScriptEncoder, - decoder: CwlEcmaScriptDecoder = new CwlEcmaScriptDecoder): ErrorOr[WomValue] = { - def evaluate = evalRaw(expr) { (context, scope) => - - val (key, value) = rawValues - - val jsValue = encoder.encode(value) - val field = writeValue(jsValue)(context, scope) - ScriptableObject.putProperty(scope, key, field) - - val jsMap = mapValues.safeMapValues{ _.safeMapValues(encoder.encode) } - - jsMap foreach { - case (scopeId, nestedMap) => - - val newObj = context.newObject(scope) - nestedMap.toList foreach { - case (key, value) => - val newerObj = writeValue(value)(context, scope) - ScriptableObject.putProperty(newObj, key, newerObj) - } - ScriptableObject.putProperty(scope, scopeId, newObj) - } - - } - - for { - evaluated <- Try(evaluate).toErrorOr - decoded <- decoder.decode(evaluated) - } yield decoded - } -} diff --git a/cwl/src/main/scala/cwl/internal/EnhancedRhinoSandbox.scala b/cwl/src/main/scala/cwl/internal/EnhancedRhinoSandbox.scala deleted file mode 100644 index 405c04fdaa2..00000000000 --- a/cwl/src/main/scala/cwl/internal/EnhancedRhinoSandbox.scala +++ /dev/null @@ -1,167 +0,0 @@ -package cwl.internal - -import cwl.internal.EnhancedRhinoSandbox._ -import delight.rhinosandox.internal._ -import org.mozilla.javascript._ - -import scala.jdk.CollectionConverters._ -import scala.reflect._ - -/** - * Extends the RhinoSandboxImpl with some fixes and enhancements using java reflection. - * - * @param strict Should evaluation be strict. - * @param languageVersionOption The optional language version to set. - */ -class EnhancedRhinoSandbox(strict: Boolean = true, languageVersionOption: Option[Int] = None) extends RhinoSandboxImpl { - - // Allows easier reflection access to private fields - private lazy val sandboxImpl: RhinoSandboxImpl = this - - /** - * Similar to RhinoSandbox.eval but passes back the context and scope for mutating before evaluation. - * - * With the original RhinoSandbox.eval not sure how to: - * - Create (nested) arrays - * - Create (nested) maps - * - Set JS/ES version - * - * So this uses a copy-port of the original, using reflection to read some of RhinoSandbox's private variables. - * - * Instead of hiding the context and scope as in RhinoSandbox.eval, both are passed back through the block. - * - * TODO: Ask RhinoSandbox if hacks are even needed, and if so contrib back patches so that reflection isn't required. - * - Is there a way to pass nested maps/arrays via a `java.util.Map[String, Object]`, or must we use our `block`? - * - Additionally: can we skip passing `context` to a block as a thread may just call `Context.getCurrentContext()`? - * - * @see https://maxrohde.com/2015/08/06/sandboxing-javascript-in-java-app-link-collection/ - * @see https://github.com/javadelight/delight-rhino-sandbox/blob/9f5a073/src/main/java/delight/rhinosandox/internal/RhinoSandboxImpl.java#L100-L123 - * @see delight.rhinosandox.internal.RhinoSandboxImpl#assertContextFactory() - */ - def eval(sourceName: String, js: String)(block: (Context, Scriptable) => Unit): AnyRef = { - assertContextFactory() - val sandboxImpl_contextFactory = PrivateField(sandboxImpl, "contextFactory").as[ContextFactory] - // RhinoSandbox diff: eval has enterContext inside the try, but Rhino docs say it belongs outside. - // https://www-archive.mozilla.org/rhino/apidocs/org/mozilla/javascript/ContextFactory.html#enterContext%28%29 - val context = sandboxImpl_contextFactory.enterContext - try { - // RhinoSandbox diff: allow setting the language version - languageVersionOption foreach context.setLanguageVersion - assertSafeScope(context) - val sandboxImpl_globalScope = PrivateField(sandboxImpl, "globalScope").as[ScriptableObject] - val sandboxImpl_sealScope = PrivateField(sandboxImpl, "sealScope").as[Boolean] - if (sandboxImpl_sealScope) { - sandboxImpl_globalScope.sealObject() - } - val sandboxImpl_safeScope = PrivateField(sandboxImpl, "safeScope").as[ScriptableObject] - val instanceScope = context.newObject(sandboxImpl_safeScope) - instanceScope.setPrototype(sandboxImpl_safeScope) - instanceScope.setParentScope(null) - - block(context, instanceScope) - - // RhinoSandbox diff: allow strict JS/ES evaluation - // See note at top assertContextFactory as to why we have to put 'use strict'; here. - // All on one line to avoid off-by-one error for javascript error messages that report line numbers. - // Could also pass zero as the line number, but the RhinoSandbox passes hard codes line number one also. - val script = if (strict) s"'use strict';$js" else js - context.evaluateString(instanceScope, script, sourceName, 1, null) - } finally { - Context.exit() - } - } - - /** - * Stricter context factory modified from RhinoSandboxImpl.assertContextFactory(). - * - * ContextFactory.initGlobal() is called within a static synchronized block. - * The globalScope initialized via initSafeStandardObjects instead of initStandardObjects. - * - * The default implementation uses a SafeContext that allows non-strict JS/ES. We would ideally set - * FEATURE_STRICT_MODE to true but that only produces warnings and doesn't return an error. Unfortunately when - * FEATURE_WARNING_AS_ERROR is enabled then non-strict Rhino warnings like "missing ;" throw errors. Instead, - * "'use strict';" is injected before scripts. - */ - override def assertContextFactory(): Unit = { - if (PrivateField(sandboxImpl, "contextFactory").as[ContextFactory] != null) { - return - } - - val _safeContext = new SafeContext - PrivateField(sandboxImpl, "contextFactory") := _safeContext - val _hasExplicitGlobal = ContextFactory.hasExplicitGlobal - val _not = !_hasExplicitGlobal - // RhinoSandbox diff: the global does not like to be initialized twice. Synchronize initialization. - val sandboxImpl_contextFactory = PrivateField(sandboxImpl, "contextFactory").as[SafeContext] - if (_not) initGlobalSynchronized(sandboxImpl_contextFactory) - - val sandboxImpl_instructionLimit = PrivateField(sandboxImpl, "instructionLimit").as[Int] - PrivateField(sandboxImpl_contextFactory, "maxInstructions") := sandboxImpl_instructionLimit - val sandboxImpl_maxDuration = PrivateField(sandboxImpl, "maxDuration").as[Long] - PrivateField(sandboxImpl_contextFactory, "maxRuntimeInMs") := sandboxImpl_maxDuration - // RhinoSandbox diff: assertContextFactory has enterContext inside the try, but Rhino docs say it belongs outside. - // https://www-archive.mozilla.org/rhino/apidocs/org/mozilla/javascript/ContextFactory.html#enterContext%28%29 - val context = sandboxImpl_contextFactory.enterContext - try { - // RhinoSandbox diff: Default globalScope is created via initStandardObjects instead of initSafeStandardObjects. - // initStandardObjects would add the various java packages into the global scope, including `java.io.File`, etc. - PrivateField(sandboxImpl, "globalScope") := context.initSafeStandardObjects(null, false) - val sandboxImpl_inScope = PrivateField(sandboxImpl, "inScope").as[java.util.Map[String, AnyRef]] - val _entrySet = sandboxImpl_inScope.entrySet - val sandboxImpl_globalScope = PrivateField(sandboxImpl, "globalScope").as[ScriptableObject] - for (entry <- _entrySet.asScala) { - sandboxImpl_globalScope.put( - entry.getKey, - sandboxImpl_globalScope, - Context.toObject(entry.getValue, sandboxImpl_globalScope)) - } - val parameters = Array(classOf[String]) - val dealMethod = classOf[RhinoEvalDummy].getMethod("eval", parameters: _*) - val _rhinoEval = new RhinoEval("eval", dealMethod, sandboxImpl_globalScope) - sandboxImpl_globalScope.defineProperty("eval", _rhinoEval, ScriptableObject.DONTENUM) - } finally { - Context.exit() - } - } - -} - -object EnhancedRhinoSandbox { - - /** - * Get or sets a private field. - * - * @param obj The instance to retrieve the field from. - * @param name The name of the field. - * @tparam A The class to retrieve the value from. The field MUST exist on this class, and not a superclass. - */ - final case class PrivateField[A: ClassTag](obj: A, name: String) { - private[this] def field = { - val field = classTag[A].runtimeClass.getDeclaredField(name) - field.setAccessible(true) - field - } - - def as[B]: B = { - field.get(obj).asInstanceOf[B] - } - - def :=(value: Any): Unit = { - field.set(obj, value) - } - } - - /** - * Call ContextFactory.initGlobal in a static synchronized block. - * - * @see [[cwl.internal.EnhancedRhinoSandbox.assertContextFactory]] - */ - private def initGlobalSynchronized(sandboxImpl_contextFactory: ContextFactory) = { - synchronized { - val _hasExplicitGlobal = ContextFactory.hasExplicitGlobal - val _not = !_hasExplicitGlobal - if (_not) ContextFactory.initGlobal(sandboxImpl_contextFactory) - } - } - -} diff --git a/cwl/src/main/scala/cwl/internal/GigabytesToBytes.scala b/cwl/src/main/scala/cwl/internal/GigabytesToBytes.scala deleted file mode 100644 index 7e45aeec5ba..00000000000 --- a/cwl/src/main/scala/cwl/internal/GigabytesToBytes.scala +++ /dev/null @@ -1,26 +0,0 @@ -package cwl.internal - -import shapeless.{Coproduct, Poly1} - -object GigabytesToBytes extends Poly1 { - val toMebibytesMultiplier = Math.pow(2, 20).toLong - - implicit def long = at[Long] { - l => - val value = l * toMebibytesMultiplier - Coproduct[cwl.ResourceRequirementType](value) - } - - implicit def string = at[String] { - s => - //TODO: Scale this by multiplier https://github.com/broadinstitute/cromwell/issues/3382 - Coproduct[cwl.ResourceRequirementType](s) - } - - implicit def expression = at[cwl.Expression] { - e => - //TODO: Scale this by multiplier https://github.com/broadinstitute/cromwell/issues/3382 - Coproduct[cwl.ResourceRequirementType](e) - } -} - diff --git a/cwl/src/main/scala/cwl/model.scala b/cwl/src/main/scala/cwl/model.scala deleted file mode 100644 index 838ee66a98c..00000000000 --- a/cwl/src/main/scala/cwl/model.scala +++ /dev/null @@ -1,310 +0,0 @@ -package cwl - -import cwl.CommandLineTool.{CommandBindingSortingKey, SortKeyAndCommandPart} -import cwl.SchemaDefRequirement.{AsInputEnumSchema, AsInputRecordSchema, SchemaDefTypes} -import cwl.CwlType.CwlType -import cwl.WorkflowStepInput.InputSource -import cwl.command.ParentName -import cwl.internal.GigabytesToBytes -import eu.timepit.refined._ -import eu.timepit.refined.api.Refined -import eu.timepit.refined.string.MatchesRegex -import shapeless.syntax.singleton._ -import shapeless.{:+:, CNil, Coproduct, Poly1, Witness} -import wom.types.WomType -import wom.values.WomValue -import mouse.all._ - -object WorkflowStepInputSource { - object String { - def unapply(arg: InputSource): Option[String] = arg.select[String] - } - object StringArray { - def unapply(arg: InputSource): Option[Array[String]] = arg.select[Array[String]] - } -} - -/** - * Describes a bespoke type. - * - * @param name This field actually does _not_ appear in the v1.0 schema, but it is used anyway in the conformance tests. - * After some consideration it was determined that we should close our eyes and pretend it is in the spec. It - * makes its formal appearance as a required field in v1.1. - */ -case class InputRecordSchema( - name: String, - fields: Option[Array[InputRecordField]] = None, - `type`: W.`"record"`.T = W("record").value, - label: Option[String] = None) { - -} - -case class InputRecordField( - name: String, - `type`: MyriadInputType, - doc: Option[String] = None, - inputBinding: Option[InputCommandLineBinding], - label: Option[String] = None) - -case class InputArraySchema -( - items: MyriadInputType, - `type`: W.`"array"`.T = Witness("array").value, - label: Option[String] = None, - inputBinding: Option[InputCommandLineBinding] = None, - // IAS.secondaryFiles are NOT listed in 1.0 spec, but according to jgentry they will be, maybe - secondaryFiles: Option[SecondaryFiles] = None -) - -trait CommandLineBinding { - def loadContents: Option[Boolean] - def position: Option[Int] - def prefix: Option[String] - def separate: Option[Boolean] - def itemSeparator: Option[String] - def optionalValueFrom: Option[StringOrExpression] - def shellQuote: Option[Boolean] - // separate defaults to true - def effectiveSeparate = separate.getOrElse(true) -} - -object InputCommandLineBinding { - def default = InputCommandLineBinding() -} - -case class InputCommandLineBinding( - loadContents: Option[Boolean] = None, - position: Option[Int] = None, - prefix: Option[String] = None, - separate: Option[Boolean] = None, - itemSeparator: Option[String] = None, - valueFrom: Option[StringOrExpression] = None, - shellQuote: Option[Boolean] = None) extends CommandLineBinding { - override val optionalValueFrom = valueFrom - - def toCommandPart(sortingKey: CommandBindingSortingKey, boundValue: WomValue, hasShellCommandRequirement: Boolean, expressionLib: ExpressionLib) = { - SortKeyAndCommandPart(sortingKey, InputCommandLineBindingCommandPart(this, boundValue)(hasShellCommandRequirement, expressionLib)) - } -} - -// valueFrom is required for command line bindings in the argument section: http://www.commonwl.org/v1.0/CommandLineTool.html#CommandLineBinding -case class ArgumentCommandLineBinding( - valueFrom: StringOrExpression, - loadContents: Option[Boolean] = None, - position: Option[Int] = None, - prefix: Option[String] = None, - separate: Option[Boolean] = None, - itemSeparator: Option[String] = None, - shellQuote: Option[Boolean] = None) extends CommandLineBinding { - override val optionalValueFrom = Option(valueFrom) -} - -case class InputBinding(position: Int, prefix: String) - -object MyriadOutputInnerTypeCacheableString extends Poly1 { - import Case._ - - private def cacheableOutputRecordFieldString(field: OutputRecordField): String = { - val fieldType = field.`type`.fold(MyriadOutputTypeCacheableString) - val lqn = field.name.substring(field.name.lastIndexOf('#') + 1) - s"OutputRecordField($lqn,$fieldType,${field.doc},${field.outputBinding})" - } - - implicit def recordSchema: Aux[OutputRecordSchema, String] = at[OutputRecordSchema] { - s => - val t = s.`type` - s"OutputRecordSchema($t,${s.fields.map(a => "Array(" + a.map(cacheableOutputRecordFieldString).mkString(",") + ")" )},${s.label})" - } - - implicit def arraySchema: Aux[OutputArraySchema, String] = at[OutputArraySchema] { - a => - val is: String = a.items.fold(MyriadOutputTypeCacheableString) - val t = a.`type` - s"OutputArraySchema($is,$t,${a.label},${a.outputBinding})" - } - - implicit def enumSchema: Aux[OutputEnumSchema, String] = at[OutputEnumSchema] { _.toString } - implicit def cwlType: Aux[CwlType, String] = at[CwlType] { _.toString } - implicit def string: Aux[String, String] = at[String] { identity } -} - - -object MyriadOutputTypeCacheableString extends Poly1 { - import Case._ - - implicit def one: Aux[MyriadOutputInnerType, String] = at[MyriadOutputInnerType] { - _.fold(MyriadOutputInnerTypeCacheableString) - } - - implicit def many: Aux[Array[MyriadOutputInnerType], String] = at[Array[MyriadOutputInnerType]] { a => - val strings: Array[String] = a.map(_.fold(MyriadOutputInnerTypeCacheableString)) - "Array(" + strings.mkString(",") + ")" - } -} - -object SecondaryFilesCacheableString extends Poly1 { - import Case._ - - implicit def one: Aux[StringOrExpression, String] = at[StringOrExpression] { - _.toString - } - - implicit def array: Aux[Array[StringOrExpression], String] = at[Array[StringOrExpression]] { - _.mkString("Array(", ",", ")") - } - -} - -case class OutputRecordSchema( - `type`: W.`"record"`.T, - fields: Option[Array[OutputRecordField]], - label: Option[String]) - -case class OutputRecordField( - name: String, - `type`: MyriadOutputType, - doc: Option[String], - outputBinding: Option[CommandOutputBinding]) - -case class OutputArraySchema( - items: MyriadOutputType, - `type`: W.`"array"`.T = Witness("array").value, - label: Option[String] = None, - outputBinding: Option[CommandOutputBinding] = None) - -case class InlineJavascriptRequirement( - `class`: W.`"InlineJavascriptRequirement"`.T = "InlineJavascriptRequirement".narrow, - expressionLib: Option[Array[String]] = None) - -case class SchemaDefRequirement( - types: Array[SchemaDefTypes] = Array.empty, - `class`: W.`"SchemaDefRequirement"`.T = Witness("SchemaDefRequirement").value) { - - def lookupType(tpe: String): Option[WomType] = - lookupCwlType(tpe).flatMap{ - case AsInputRecordSchema(inputRecordSchema: InputRecordSchema) => MyriadInputInnerTypeToWomType.inputRecordSchemaToWomType(inputRecordSchema).apply(this) |> Option.apply - case _ => None - } - - //Currently only InputRecordSchema has a name in the spec, so it is the only thing that can be referenced via string - def lookupCwlType(tpe: String): Option[SchemaDefTypes] = { - - def matchesType(inputEnumSchema: InputEnumSchema): Boolean = { - inputEnumSchema.name.fold(false){name => FileAndId(name)(ParentName.empty).id equalsIgnoreCase FileAndId(tpe)(ParentName.empty).id} - } - - types.toList.flatMap { - case AsInputRecordSchema(inputRecordSchema: InputRecordSchema) if FileAndId(inputRecordSchema.name)(ParentName.empty).id equalsIgnoreCase FileAndId(tpe)(ParentName.empty).id => - List(Coproduct[SchemaDefTypes](inputRecordSchema)) - case AsInputEnumSchema(inputEnumSchema: InputEnumSchema) if matchesType(inputEnumSchema) => - List(Coproduct[SchemaDefTypes](inputEnumSchema)) - case _ => - List() - }.headOption - } -} - -object SchemaDefRequirement { - type SchemaDefTypes = InputRecordSchema :+: InputEnumSchema :+: InputArraySchema :+: CNil - - object AsInputRecordSchema { - def unapply(arg: SchemaDefTypes): Option[InputRecordSchema] = arg.select[InputRecordSchema] - } - - object AsInputEnumSchema { - def unapply(arg: SchemaDefTypes): Option[InputEnumSchema] = arg.select[InputEnumSchema] - } - - object AsInputArraySchema { - def unapply(arg: SchemaDefTypes): Option[InputArraySchema] = arg.select[InputArraySchema] - } -} - -//There is a large potential for regex refinements on these string types -case class DockerRequirement( - `class`: W.`"DockerRequirement"`.T, - dockerPull: Option[String], //TODO Refine to match a docker image regex? - dockerLoad: Option[String], - dockerFile: Option[String], - dockerImport: Option[String], - dockerImageId: Option[String], - dockerOutputDirectory: Option[String] - ) - -case class SoftwareRequirement( - `class`: W.`"SoftwareRequirement"`.T, - packages: Array[SoftwarePackage] = Array.empty - ) - -case class SoftwarePackage( - `package`: String, - version: Option[Array[String]], - specs: Option[Array[String]] // This could be refined to match a regex for IRI. - ) { - type Package = String - type Specs = Array[String] -} - -case class EnvVarRequirement( - `class`: EnvVarRequirement.ClassType = EnvVarRequirement.`class`, - envDef: Array[EnvironmentDef] - ) - -object EnvVarRequirement { - type ClassType = Witness.`"EnvVarRequirement"`.T - - val `class`: ClassType = "EnvVarRequirement".asInstanceOf[ClassType] -} - -case class EnvironmentDef(envName: String, envValue: StringOrExpression) { - type EnvName = String - type EnvValue = String -} - - -case class ShellCommandRequirement(`class`: W.`"ShellCommandRequirement"`.T = "ShellCommandRequirement".narrow) - -case class ResourceRequirement( - `class`: W.`"ResourceRequirement"`.T, - coresMin: Option[ResourceRequirementType], - coresMax: Option[ResourceRequirementType], - ramMin: Option[ResourceRequirementType], - ramMax: Option[ResourceRequirementType], - tmpdirMin: Option[ResourceRequirementType], - tmpdirMax: Option[ResourceRequirementType], - outdirMin: Option[ResourceRequirementType], - outdirMax: Option[ResourceRequirementType]) { - def effectiveCoreMin = coresMin.orElse(coresMax) - def effectiveCoreMax = coresMax.orElse(coresMin) - - def effectiveRamMin = ramMin.orElse(ramMax).map(_.fold(GigabytesToBytes)) - def effectiveRamMax = ramMax.orElse(ramMin).map(_.fold(GigabytesToBytes)) - - def effectiveTmpdirMin = tmpdirMin.orElse(tmpdirMax) - def effectiveTmpdirMax = tmpdirMax.orElse(tmpdirMin) - - def effectiveOutdirMin = outdirMin.orElse(outdirMax) - def effectiveOutdirMax = outdirMax.orElse(outdirMin) -} - -/** - * This promotes DNA Nexus InputResourceRequirement to a first class citizen requirement, which it really isn't. - * Since it's the only one for now it's not a big deal but if more of these pop up we might want to treat custom requirements - * in a different way - */ -case class DnaNexusInputResourceRequirement( - `class`: String Refined MatchesRegex[W.`".*InputResourceRequirement"`.T], - indirMin: Option[Long] - ) - -case class SubworkflowFeatureRequirement( - `class`: W.`"SubworkflowFeatureRequirement"`.T) - -case class ScatterFeatureRequirement( - `class`: W.`"ScatterFeatureRequirement"`.T) - -case class MultipleInputFeatureRequirement( - `class`: W.`"MultipleInputFeatureRequirement"`.T) - -case class StepInputExpressionRequirement( - `class`: W.`"StepInputExpressionRequirement"`.T) diff --git a/cwl/src/main/scala/cwl/ontology/CacheConfiguration.scala b/cwl/src/main/scala/cwl/ontology/CacheConfiguration.scala deleted file mode 100644 index cb82d9db591..00000000000 --- a/cwl/src/main/scala/cwl/ontology/CacheConfiguration.scala +++ /dev/null @@ -1,15 +0,0 @@ -package cwl.ontology - -import com.typesafe.config.Config -import common.validation.Validation._ -import net.ceedubs.ficus.Ficus._ - -case class CacheConfiguration(maxSize: Long) - -object CacheConfiguration { - def apply(config: Config): CacheConfiguration = { - validate(config.getAs[Long]("max-size").getOrElse(0L)) - .map(new CacheConfiguration(_)) - .unsafe("Ontology cache configuration") - } -} diff --git a/cwl/src/main/scala/cwl/ontology/OntologyConfiguration.scala b/cwl/src/main/scala/cwl/ontology/OntologyConfiguration.scala deleted file mode 100644 index 5382ac35b91..00000000000 --- a/cwl/src/main/scala/cwl/ontology/OntologyConfiguration.scala +++ /dev/null @@ -1,27 +0,0 @@ -package cwl.ontology - -import cats.syntax.apply._ -import com.typesafe.config.Config -import common.util.Backoff -import common.validation.ErrorOr.ErrorOr -import common.validation.Validation._ -import net.ceedubs.ficus.Ficus._ - -import scala.concurrent.duration.FiniteDuration - -case class OntologyConfiguration(retries: Option[Int], backoff: Backoff, poolSize: Int) - -object OntologyConfiguration { - def apply(config: Config): OntologyConfiguration = { - val retries = validate { Option(config.as[Int]("retries")) } - val backoff = validate { - Backoff.staticBackoff( - config.as[FiniteDuration]("backoff-time") - ) - } - val poolSize = validate { config.as[Int]("pool-size") } - - val validated: ErrorOr[OntologyConfiguration] = (retries, backoff, poolSize).mapN(OntologyConfiguration.apply) - validated.unsafe("Ontology configuration") - } -} diff --git a/cwl/src/main/scala/cwl/ontology/Schema.scala b/cwl/src/main/scala/cwl/ontology/Schema.scala deleted file mode 100644 index a2ed13d7bf3..00000000000 --- a/cwl/src/main/scala/cwl/ontology/Schema.scala +++ /dev/null @@ -1,180 +0,0 @@ -package cwl.ontology - -import java.util.concurrent.Executors - -import cats.effect.IO -import cats.syntax.traverse._ -import cats.instances.list._ -import com.google.common.cache.{Cache, CacheBuilder} -import com.typesafe.config.{Config, ConfigFactory} -import com.typesafe.scalalogging.Logger -import common.util.IORetry -import common.util.IORetry.StatefulIoError -import common.validation.ErrorOr._ -import common.validation.Validation._ -import cwl.ontology.Schema._ -import mouse.all._ -import net.ceedubs.ficus.Ficus._ -import org.semanticweb.owlapi.apibinding.OWLManager -import org.semanticweb.owlapi.model._ -import org.semanticweb.owlapi.model.parameters.OntologyCopy -import org.semanticweb.owlapi.reasoner.structural.StructuralReasonerFactory -import org.semanticweb.owlapi.reasoner.{OWLReasoner, OWLReasonerFactory} -import org.semanticweb.owlapi.util.OWLAPIStreamUtils -import org.slf4j.LoggerFactory - -import scala.jdk.CollectionConverters._ -import scala.concurrent.ExecutionContext -import scala.util.Try - -/** - * OWL/RDF Schema lookup. - * - * @param schemaIris IRI paths to OWL/RDF schemas. - * @param namespaces Additional/Override namespace prefixes. - */ -case class Schema(schemaIris: Seq[String], - namespaces: Map[String, String], - ontologyManager: OWLOntologyManager = OWLManager.createOWLOntologyManager, - reasonerFactory: OWLReasonerFactory = new StructuralReasonerFactory) { - - /** - * Returns a full IRI based on a full or abbreviated IRI. - * - * A full IRI wrapped in < and > will not be checked for abbreviations but will be returned without the wrapper. - * - * @see https://www.w3.org/TR/owl2-syntax/#IRIs - */ - def fullIri(name: String): String = getIri(name).getIRIString - - /** - * Returns true if child is an equivalent of ancestor, and if not then recursively checks if any of the child's - * super classes are an equivalent of ancestor. - */ - def isSubClass(child: String, ancestor: String): Boolean = { - val childIri: IRI = getIri(child) - val ancestorIri: IRI = getIri(ancestor) - val childClass: OWLClass = dataFactory.getOWLClass(childIri) - val ancestorClass: OWLClass = dataFactory.getOWLClass(ancestorIri) - val schemaReasoner: OWLReasoner = reasonerFactory.createReasoner(schemaOntology) - try { - Schema.isSubClass(schemaReasoner, childClass, ancestorClass) - } finally { - schemaReasoner.dispose() - } - } - - private val dataFactory: OWLDataFactory = ontologyManager.getOWLDataFactory - private val schemaOntology: OWLOntology = ontologyManager.createOntology() - private val schemaPrefixManager: PrefixManager = - ontologyManager.getOntologyFormat(schemaOntology).asPrefixOWLDocumentFormat - - { - def addToSchema(originalOntology: OWLOntology): Unit = { - schemaOntology.addAxioms(originalOntology.axioms()) - val originalOntologyFormat: OWLDocumentFormat = ontologyManager.getOntologyFormat(originalOntology) - if (originalOntologyFormat.isPrefixOWLDocumentFormat) - schemaPrefixManager.copyPrefixesFrom(originalOntologyFormat.asPrefixOWLDocumentFormat) - } - - val errorOr: ErrorOr[Unit] = for { - ontologies <- schemaIris.toList.traverse(loadOntologyFromIri(ontologyManager)) - _ = ontologies.foreach(addToSchema) - } yield () - errorOr.toTry("Error loading schemas").get - - // Add any namespace overrides - namespaces foreach { - case (prefixName, prefix) => schemaPrefixManager.setPrefix(prefixName, prefix) - } - } - - /** - * Returns the full IRI for the name using the prefixManager. - * - * TODO: Not 100% sure why you can't ask owl-api for an iri-with-prefix and have it looked up automatically. - * - * There does seem to be a difference between abbreviated and full IRIs in the official spec, where full IRIs are - * wrapped in < >. But this doesn't seem to be the format used by CWL nor owl-api. - * - * Please update this comment if/when one knows the correct behavior. - * - * @see https://www.w3.org/TR/owl2-syntax/#IRIs - */ - private def getIri(name: String): IRI = { - Try(schemaPrefixManager.getIRI(name)).getOrElse(IRI.create(name)) - } -} - -object Schema { - // Extending StrictLogging creates a circular dependency here for some reason, so making the logger ourselves - private val logger: Logger = Logger(LoggerFactory.getLogger(getClass.getName)) - private [ontology] val ontologyConfig = ConfigFactory.load.as[Config]("ontology") - private val ontologyConfiguration = OntologyConfiguration(ontologyConfig) - private [ontology] val cacheConfig = ontologyConfig.getAs[Config]("cache") - - // Simple cache to avoid reloading the same ontologies too often - private val ontologyCache = cacheConfig.map(makeOntologyCache) - - private implicit val statefulIoError = StatefulIoError.noop[Unit] - private implicit val timer = cats.effect.IO.timer(ExecutionContext.fromExecutor(Executors.newFixedThreadPool(ontologyConfiguration.poolSize))) - - private [ontology] def makeOntologyCache(config: Config): Cache[IRI, OWLOntology] = { - val cacheConfig = CacheConfiguration(config) - logger.info(s"Ontology cache size: ${cacheConfig.maxSize}") - CacheBuilder.newBuilder() - .maximumSize(cacheConfig.maxSize) - .build[IRI, OWLOntology]() - } - - /** - * Returns the absolute path for a file, possibly relative to parent. - */ - def getIriPath(parent: String, path: String): String = IRI.create(parent).resolve(path).getIRIString - - /** - * Load an ontology either from an IRI. - */ - private [ontology] def loadOntologyFromIri(ontologyManager: OWLOntologyManager, cache: Option[Cache[IRI, OWLOntology]] = ontologyCache)(schemaIri: String): ErrorOr[OWLOntology] = { - validate { - val iri = IRI.create(schemaIri) - cache.flatMap(_.getIfPresent(iri) |> Option.apply) match { - case Some(ontology) => - ontologyManager.copyOntology(ontology, OntologyCopy.DEEP) - case _ => - logger.info(s"Loading ${iri.toURI.toString}") - val ontology = loadOntologyFromIri(ontologyManager, iri) - cache.foreach(_.put(iri, ontology)) - ontology - } - } - } - - // Loading the ontology can fail transiently, so put retires around it. See https://github.com/protegeproject/webprotege/issues/298 - private [ontology] def loadOntologyFromIri(ontologyManager: OWLOntologyManager, iri: IRI): OWLOntology = { - val load = IO { ontologyManager.loadOntologyFromOntologyDocument(iri) } - IORetry.withRetry[OWLOntology, Unit](load, (), ontologyConfiguration.retries, ontologyConfiguration.backoff).unsafeRunSync() - } - - /** - * Returns true if child is an equivalent of ancestor, and if not then recursively checks if any of the child's - * super classes are an equivalent of ancestor. - */ - private def isSubClass(reasoner: OWLReasoner, childClass: OWLClass, ancestorClass: OWLClass): Boolean = { - val equivalent: Set[OWLClass] = reasoner.getEquivalentClasses(childClass).asScala.toSet + childClass - if (equivalent.contains(ancestorClass)) { - true - } else { - val parentClasses: Set[OWLClass] = for { - equivalentClass <- equivalent - parentClass <- OWLAPIStreamUtils - .asSet(reasoner.getSuperClasses(equivalentClass).entities) - .asScala - .toSet[OWLClass] - } yield parentClass - parentClasses.collect({ - case superClass: OWLClass if isSubClass(reasoner, superClass, ancestorClass) => superClass - }).nonEmpty - } - } -} diff --git a/cwl/src/main/scala/cwl/package.scala b/cwl/src/main/scala/cwl/package.scala deleted file mode 100644 index 96796712f1e..00000000000 --- a/cwl/src/main/scala/cwl/package.scala +++ /dev/null @@ -1,154 +0,0 @@ - -import cats.data.ReaderT -import common.Checked -import common.validation.Checked._ -import common.validation.ErrorOr._ -import cwl.CwlType._ -import cwl.ExpressionEvaluator.{ECMAScriptExpression, ECMAScriptFunction} -import cwl.command.ParentName -import cwl.ontology.Schema -import shapeless._ -import wom.executable.Executable -import wom.expression.IoFunctionSet -import wom.types._ -import wom.values.WomEvaluatedCallInputs - -import scala.util.{Failure, Success, Try} - -/** - * This package is intended to parse all CWL files. - * - * It makes heavy use of Circe YAML/Json auto derivation feature and - * Circe modules that support the Scala libraries shapeless and Refined. - * - * The [[https://oss.sonatype.org/service/local/repositories/releases/archive/com/chuusai/shapeless_2.12/2.3.2/shapeless_2.12-2.3.2-javadoc.jar/!/shapeless/Coproduct.html shapeless.coproduct]] feature allows us to specify a large - * number of potential types to be parsed. A.k.a. a "disjunction" or "OR" relationship amongst these types. - * - * The [[https://github.com/fthomas/refined/blob/master/modules/core/shared/src/main/scala/eu/timepit/refined/string.scala MatchesRegex]] "refined type" is used - * to enforce structure upon String values in the CWL Yaml. Shapeless' Witness type - * is used to declare a type containing a String literal. - * - * @see CWL Specification - * @see circe - * @see circe-yaml - * @see Refined - * @see Shapeless - */ -package object cwl extends TypeAliases { - - type CwlFile = Array[Cwl] :+: Cwl :+: CNil - type Cwl = Workflow :+: CommandLineTool :+: ExpressionTool :+: CNil - - object Cwl { - object Workflow { def unapply(cwl: Cwl): Option[Workflow] = cwl.select[Workflow] } - object CommandLineTool { def unapply(cwl: Cwl): Option[CommandLineTool] = cwl.select[CommandLineTool] } - object ExpressionTool { def unapply(cwl: Cwl): Option[ExpressionTool] = cwl.select[ExpressionTool] } - } - - def cwlTypeToWomType : CwlType => WomType = { - case CwlType.Any => WomAnyType - case Null => WomNothingType - case Boolean => WomBooleanType - case Int => WomIntegerType - case Long => WomLongType - case Float => WomFloatType - case Double => WomFloatType - case String => WomStringType - case CwlType.File => WomMaybePopulatedFileType - case CwlType.Directory => WomMaybeListedDirectoryType - } - - object StringOrExpression { - object String { - def unapply(soe: StringOrExpression): Option[String] = soe.select[String] - } - object Expression { - def unapply(soe: StringOrExpression): Option[Expression] = soe.select[Expression] - } - object ECMAScriptExpression { - def unapply(soe: StringOrExpression): Option[ECMAScriptExpression] = soe.select[Expression].flatMap(_.select[ECMAScriptExpression]) - } - object ECMAScriptFunction { - def unapply(soe: StringOrExpression): Option[ECMAScriptFunction] = soe.select[Expression].flatMap(_.select[ECMAScriptFunction]) - } - } - - object Expression { - object ECMAScriptExpression { - def unapply(soe: Expression): Option[ECMAScriptExpression] = soe.select[ECMAScriptExpression] - } - object ECMAScriptFunction { - def unapply(soe: Expression): Option[ECMAScriptFunction] = soe.select[ECMAScriptFunction] - } - } - - type WomTypeMap = Map[String, WomType] - - type RequirementsValidator = Requirement => ErrorOr[Requirement] - import cats.syntax.validated._ - val AcceptAllRequirements: RequirementsValidator = _.validNel - - implicit class CwlHelper(val cwl: Cwl) extends AnyVal { - def womExecutable(validator: RequirementsValidator, inputsFile: Option[String], ioFunctions: IoFunctionSet, strictValidation: Boolean): Checked[Executable] = { - def executable = cwl match { - case Cwl.Workflow(w) => w.womExecutable(validator, inputsFile, ioFunctions, strictValidation) - case Cwl.CommandLineTool(clt) => clt.womExecutable(validator, inputsFile, ioFunctions, strictValidation) - case Cwl.ExpressionTool(et) => et.womExecutable(validator, inputsFile, ioFunctions, strictValidation) - case oh => throw new Exception(s"Programmer Error! Unexpected case match: $oh") - } - Try(executable) match { - case Success(s) => s - case Failure(f) => f.getMessage.invalidNelCheck - case oh => throw new Exception(s"Programmer Error! Unexpected case match: $oh") - } - } - - def requiredInputs: Map[String, WomType] = { - implicit val parent = ParentName.empty - - cwl match { - case Cwl.Workflow(w) => selectWomTypeInputs(w.inputs collect { - case i if i.`type`.isDefined => FullyQualifiedName(i.id).id -> i.`type`.get - }) - case Cwl.CommandLineTool(clt) => selectWomTypeInputs(clt.inputs collect { - case i if i.`type`.isDefined => FullyQualifiedName(i.id).id -> i.`type`.get - }) - case Cwl.ExpressionTool(et) => selectWomTypeInputs(et.inputs collect { - case i if i.`type`.isDefined => FullyQualifiedName(i.id).id -> i.`type`.get - }) - case oh => throw new Exception(s"Programmer Error! Unexpected case match: $oh") - } - } - - def schemaOption: Option[Schema] = cwl.fold(CwlSchemaOptionPoly) - - private def selectWomTypeInputs(myriadInputMap: Array[(String, MyriadInputType)]): Map[String, WomType] = { - (myriadInputMap collect { - case (key, MyriadInputType.WomType(w)) => key -> w - }).toMap - } - } - - object CwlSchemaOptionPoly extends Poly1 { - implicit val caseWorkflow: Case.Aux[Workflow, Option[Schema]] = at { - workflow => getSchema(workflow.`$schemas`, workflow.`$namespaces`) - } - implicit val caseCommandLineTool: Case.Aux[CommandLineTool, Option[Schema]] = at { - commandLineTool => getSchema(commandLineTool.`$schemas`, commandLineTool.`$namespaces`) - } - implicit val caseExpressionTool: Case.Aux[ExpressionTool, Option[Schema]] = at { - expressionTool => getSchema(expressionTool.`$schemas`, expressionTool.`$namespaces`) - } - - private def getSchema(schemasOption: Option[Array[String]], - namespacesOption: Option[Map[String, String]]): Option[Schema] = { - schemasOption.map(a => Schema(a.toIndexedSeq, namespacesOption getOrElse Map.empty)) - } - } - - type ExpressionLib = Vector[String] - - type Inputs = (RequirementsAndHints, ExpressionLib, WomEvaluatedCallInputs) - - type CommandPartExpression[A] = ReaderT[ErrorOr, Inputs, A] -} diff --git a/cwl/src/main/scala/cwl/preprocessor/CwlCanonicalizer.scala b/cwl/src/main/scala/cwl/preprocessor/CwlCanonicalizer.scala deleted file mode 100644 index b369d404c18..00000000000 --- a/cwl/src/main/scala/cwl/preprocessor/CwlCanonicalizer.scala +++ /dev/null @@ -1,249 +0,0 @@ -package cwl.preprocessor - -import cats.effect.{ContextShift, IO} -import cats.syntax.parallel._ -import cats.instances.list._ -import common.validation.ErrorOr.ErrorOr -import common.validation.IOChecked._ -import common.validation.Validation._ -import cwl.preprocessor.CwlReference.EnhancedCwlId -import cwl.preprocessor.CwlPreProcessor._ -import io.circe.Json -import io.circe.optics.JsonPath._ -import cwl.preprocessor.CwlCanonicalizer._ - -/** - * The real guts of the CWL pre-processor is taking a CWL reference and producing a single, self-contained JSON from it. - */ -private [preprocessor] class CwlCanonicalizer(saladFunction: SaladFunction)(implicit cs: ContextShift[IO]) { - - def getCanonicalCwl(reference: CwlReference, - namespacesJsonOption: Option[Json] = None, - schemasJsonOption: Option[Json] = None): IOChecked[Json] = { - flattenCwlReferenceInner( - reference, - Map.empty, - Map.empty, - Set.empty, - namespacesJsonOption, - schemasJsonOption).map(_.processedJson) - } - - /** - * Flatten the cwl reference given already known processed references. - */ - private def flattenCwlReferenceInner(cwlReference: CwlReference, - unProcessedReferences: UnProcessedReferences, - processedReferences: ProcessedReferences, - breadCrumbs: Set[CwlReference], - namespacesJsonOption: Option[Json], - schemasJsonOption: Option[Json]): IOChecked[ProcessedJsonAndDependencies] = { - /* - * Salad and parse from a CWL reference into a Json object - */ - def saladAndParse(ref: CwlReference): IOChecked[Json] = for { - saladed <- saladFunction(ref) - saladedJson <- parseJson(saladed) - } yield saladedJson - - for { - // parse the file containing the reference - parsed <- saladAndParse(cwlReference) - // Get a Map[CwlReference, Json] from the parsed file. If the file is a JSON object and only contains one node, the map will only have 1 element - newUnProcessedReferences = mapIdToContent(parsed).toMap - // The reference json in the file - referenceJson <- newUnProcessedReferences - .collectFirst({ case (ref, json) if ref.pointerWithinFile == cwlReference.pointerWithinFile => json }) - .toIOChecked(s"Cannot find a tool or workflow with ID '${cwlReference.pointerWithinFile}' in file ${cwlReference.pathAsString}'s set: [${newUnProcessedReferences.keySet.mkString(", ")}]") - // Process the reference json - processed <- flattenJson( - referenceJson, - newUnProcessedReferences ++ unProcessedReferences, - processedReferences, - breadCrumbs + cwlReference, - namespacesJsonOption, - schemasJsonOption - ) - } yield processed - } - - /** - * Given a Json representing a tool or workflow, flattens it and return the other processed references that were generated. - * - * NB: Flatten here means two things: - * - Find references within the CWL and convert them into 'local' links - * - Create a map of canonical links to JSON CWL content - * - * @param saladedJson json to process - * @param unProcessedReferences references that have been parsed and saladed (we have the json), but not flattened yet - * @param processedReferences references that are fully flattened - * @param breadCrumbs list of references that brought us here - * @param namespacesJsonOption Namespaces from the original json - * @param schemasJsonOption Schemas from the original json - */ - private def flattenJson(saladedJson: Json, - unProcessedReferences: UnProcessedReferences, - processedReferences: ProcessedReferences, - breadCrumbs: Set[CwlReference], - namespacesJsonOption: Option[Json], - schemasJsonOption: Option[Json]): IOChecked[ProcessedJsonAndDependencies] = { - /* - * Given a reference from a step's run field, flattens it and return it - * @param unProcessedReferences references that have been parsed and saladed (we have the json), but not flattened yet. - * @param checkedProcessedReferences references that are fully processed - * @param cwlReference reference being processed - * @return a new ProcessedReferences Map including this cwlReference processed along with all the dependencies - * that might have been processed recursively. - */ - def processCwlRunReference(checkedProcessedReferences: IOChecked[ProcessedReferences], - cwlReference: CwlReference): IOChecked[ProcessedReferences] = { - def processReference(processedReferences: ProcessedReferences) = { - - val result: IOChecked[ProcessedJsonAndDependencies] = unProcessedReferences.get(cwlReference) match { - case Some(unProcessedReferenceJson) => - // Found the json in the unprocessed map, no need to reparse the file, just flatten this json - flattenJson( - unProcessedReferenceJson, - unProcessedReferences, - processedReferences, - breadCrumbs, - namespacesJsonOption, - schemasJsonOption - ) - case None => - // This is the first time we're seeing this reference, we need to parse its file and flatten it - flattenCwlReferenceInner( - cwlReference, - unProcessedReferences, - processedReferences, - breadCrumbs, - namespacesJsonOption, - schemasJsonOption - ) - } - - result map { - // Return everything we've got (the previously known "processedReferences" + our new processed reference + everything that was processed to get to it) - case ProcessedJsonAndDependencies(processed, newReferences) => processedReferences ++ newReferences + (cwlReference -> processed) - } - } - - def processIfNeeded(processedReferences: ProcessedReferences): IOChecked[ProcessedReferences] = { - // If the reference has already been processed, no need to do anything - if (processedReferences.contains(cwlReference)) processedReferences.validIOChecked - // If the reference is in the bread crumbs it means we circled back to it: fail the pre-processing - else if (breadCrumbs.contains(cwlReference)) s"Found a circular dependency on $cwlReference".invalidIOChecked - // Otherwise let's see if we already have the json for it or if we need to process the file - else processReference(processedReferences) - } - - for { - processedReferences <- checkedProcessedReferences - newReferences <- processIfNeeded(processedReferences) - } yield newReferences - } - - def addJsonKeyValue(originalJson: Json, key: String, valueOption: Option[Json]): Json = { - valueOption match { - case Some(value) => originalJson.mapObject(_.add(key, value)) - case None => originalJson - } - } - - val namespacesJson = addJsonKeyValue(saladedJson, JsonKeyNamespaces, namespacesJsonOption) - val schemasJson = addJsonKeyValue(namespacesJson, JsonKeySchemas, schemasJsonOption) - - import cats.syntax.apply._ - - // Take the processed runs and inject them in the json - def inlineProcessedJsons(newKnownReferences: ProcessedReferences, inlinedRunWorkflows: Map[String, ProcessedJsonAndDependencies]) = { - // Provide a function to swap the run reference with its json content - val lookupFunction: Json => Json = { - json: Json => { - val fromRunReferenceMap = for { - asString <- json.asString - reference <- asString.asReference - embeddedJson <- newKnownReferences.get(reference) - } yield embeddedJson - - val fromInlinedWorkflow = for { - asObject <- json.asObject - id <- asObject.kleisli("id") - idAsString <- id.asString - embeddedJson <- inlinedRunWorkflows.get(idAsString) - } yield embeddedJson.processedJson - - fromRunReferenceMap.orElse(fromInlinedWorkflow).getOrElse(json) - } - } - - val flattenedJson = root.steps.each.run.json.modify(lookupFunction)(schemasJson) - - ProcessedJsonAndDependencies(flattenedJson, newKnownReferences ++ inlinedRunWorkflows.values.flatMap(_.processedDependencies)) - } - - /* - * Given a json, collects all "steps.run" values that are JSON Strings, and convert them to CwlReferences. - * A saladed JSON is assumed. - */ - def findRunReferences(json: Json): List[CwlReference] = { - json.asArray match { - case Some(cwls) => cwls.toList.flatMap(findRunReferences) - case _ => root.steps.each.run.string.getAll(json).flatMap(_.asReference).distinct - } - } - - /* - * Given a json, collects all "steps.run" values that are JSON Objects representing a workflow. - * A saladed JSON is assumed. - * @return a Map[String, Json], where the key is the cwl id of the workflow, and the value its content - */ - def findRunInlinedWorkflows(json: Json): ErrorOr[Map[String, Json]] = { - import cats.syntax.traverse._ - - json.asArray match { - case Some(cwls) => cwls.toList - .flatTraverse(findRunInlinedWorkflows(_).map(_.toList)) - .map(_.toMap) - case _ => - // Look for all the "run" steps that are json objects - root.steps.each.run.obj.getAll(json) - .map(Json.fromJsonObject) - // Only keep the workflows (CommandLineTools don't have steps so no need to process them) - .filter(root.`class`.string.exist(_.equalsIgnoreCase("Workflow"))) - .traverse[ErrorOr, (String, Json)]( obj => - // Find the id of the workflow - root.id.string.getOption(obj) - .toErrorOr("Programmer error: Workflow did not contain an id. Make sure the cwl has been saladed") - .map(_ -> obj) - ).map(_.toMap) - } - } - - // Recursively process the run references (where run is a string pointing to another Workflow / Tool) - // TODO: it would be nice to accumulate failures here somehow (while still folding and be able to re-use - // successfully processed references, so I don't know if ErrorOr would work) - val processedRunReferences: IOChecked[ProcessedReferences] = findRunReferences(schemasJson).foldLeft(processedReferences.validIOChecked)(processCwlRunReference) - - // Recursively process the inlined run workflows (where run is a Json object representing a workflow) - val processedInlineReferences: IOChecked[Map[String, ProcessedJsonAndDependencies]] = (for { - inlineWorkflowReferences <- findRunInlinedWorkflows(saladedJson).toIOChecked - flattenedWorkflows <- inlineWorkflowReferences.toList.parTraverse[IOChecked, (String, ProcessedJsonAndDependencies)]({ - case (id, value) => flattenJson(value, unProcessedReferences, processedReferences, breadCrumbs, namespacesJsonOption, schemasJsonOption).map(id -> _) - }) - } yield flattenedWorkflows).map(_.toMap) - - // Replace the unprocessed runs with their processed value - (processedRunReferences, processedInlineReferences).tupled.map(Function.tupled(inlineProcessedJsons)) - } -} - -private object CwlCanonicalizer { - /** - * A Cwl json that has been processed (saladed and flattened), as well as its processed dependencies. - */ - final case class ProcessedJsonAndDependencies(processedJson: Json, processedDependencies: ProcessedReferences) - - final type UnProcessedReferences = Map[CwlReference, Json] - final type ProcessedReferences = Map[CwlReference, Json] -} diff --git a/cwl/src/main/scala/cwl/preprocessor/CwlPreProcessor.scala b/cwl/src/main/scala/cwl/preprocessor/CwlPreProcessor.scala deleted file mode 100644 index 9855ee73953..00000000000 --- a/cwl/src/main/scala/cwl/preprocessor/CwlPreProcessor.scala +++ /dev/null @@ -1,236 +0,0 @@ -package cwl.preprocessor - -import java.util.concurrent.Executors - -import cats.data.NonEmptyList -import cats.effect.{ContextShift, IO} -import cats.syntax.either._ -import common.validation.IOChecked._ -import cwl.CwlDecoder -import cwl.ontology.Schema -import cwl.preprocessor.CwlPreProcessor._ -import cwl.preprocessor.CwlReference.EnhancedCwlId -import io.circe.optics.JsonPath._ -import io.circe.{Json, JsonNumber, JsonObject} -import mouse.all._ -import org.slf4j.LoggerFactory -import wom.util.YamlUtils - -import scala.concurrent.ExecutionContext - -/** - * Class to create a standalone version of a CWL file. - * - * NB: Want to use the pre-processor? Use preProcessCwl(ref: CwlReference) - * - * @param saladFunction function that takes a file and produce a saladed version of the content - */ -class CwlPreProcessor(saladFunction: SaladFunction = saladCwlFile) { - - private val ec: ExecutionContext = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(5)) - private implicit val cs = IO.contextShift(ec) - - /** - * This is THE main entry point into the CWL pre-processor. Takes a CWL reference and - * returns a canonical JSON version with all references resolved. - * - * @param ref The reference to the CWL to pre-process - * @return A canonical JSON representation of the CWL with all internal references expanded in-place - */ - def preProcessCwl(ref: CwlReference): IOChecked[Json] = ref match { - case file: CwlFileReference => preProcessCwlFile(file) - case other => preProcessRemoteCwl(other) - } - - /** - * Convenience method to get the processed workflow as a string. - */ - def preProcessCwlToString(cwlReference: CwlReference): IOChecked[String] = preProcessCwl(cwlReference).map(_.printCompact) - - def preProcessInputFiles(inputContent: String, mappingFunction: String => String): IOChecked[String] = for { - parsed <- parseYaml(inputContent) - mapped = parsed |> mapFilesAndDirectories(mappingFunction) |> mapNumbers - } yield mapped.printCompact - - /** - * Pre-process a CWL file and create a standalone, runnable (given proper inputs), inlined version of its content. - * - * The general idea is to work on CwlReferences, starting from the one coming to this function in the form of file and optional root. - * The goal is to look at the steps in this workflow that point to other references, and recursively flatten them until we can replace the step with - * its flat version. - * - * There are 3 pieces of information that are carried around during this process: - * 1) ProcessedReferences: A Map[CwlReference, Json] of CwlReference for which we have the fully processed (saladed AND flattened) Json value. - * - * 2) UnProcessedReferences: A Map[CwlReference, Json] of CwlReference for which we have the saladed but NOT flattened Json value. - * This can happen because a file can contain multiple tools / workflows. When we salad / parse this file, we get (CwlReference, Json) pairs - * for all the workflow / tools in the file, but they are not flattened yet. - * We keep this to avoid having to re-salad / re-parse files unnecessarily. - * - * 3) BreadCrumb: A Set[CwlReference] used to follow the trail of CwlReferences that we are processing as we recurse down. - * This is used to be able to detect circular dependencies (if the cwlReference being processed is in that set, then we have a circular dependency) . - * - */ - private def preProcessCwlFile(reference: CwlFileReference): IOChecked[Json] = { - - def absoluteSchemaPaths(json: Json): Json = { - json mapArray { - _ map absoluteSchemaPaths - } mapString { - Schema.getIriPath(reference.fullReference, _) - } - } - - // NB the JSON here is only used to decide whether or not to flatten. If we do decide to flatten we throw away the - // json and request a canonical version from the CwlCanonicalizer. - def flattenOrByPass(json: Json): IOChecked[Json] = { - def flatten(json: Json): IOChecked[Json] = { - val cwlReferenceFlattener = new CwlCanonicalizer(saladFunction) - val namespacesJsonOption: Option[Json] = json.asObject.flatMap(_.kleisli(JsonKeyNamespaces)) - val schemasJsonOption: Option[Json] = json.asObject.flatMap(_.kleisli(JsonKeySchemas)).map(absoluteSchemaPaths) - - cwlReferenceFlattener.getCanonicalCwl( - reference, - namespacesJsonOption, - schemasJsonOption - ) - } - - def bypass(alreadyCanonicalJson: Json): IOChecked[Json] = alreadyCanonicalJson.validIOChecked - - val fileContentReference = for { - asObject <- json.asObject - fileContentId <- asObject.kleisli("id") - stringId <- fileContentId.asString - fileContentReference <- CwlReference.fromString(stringId) - } yield fileContentReference - - fileContentReference match { - // This by passes the pre-processing if the file already has an id for which the file part doesn't match the path of the file - // passed to this function, as this would indicate that it has already been saladed and pre-processed. - case Some(CwlFileReference(file, _)) if !file.equals(reference.file) => bypass(json) - case _ => flatten(json) - } - } - - for { - original <- parseYaml(reference.file.contentAsString) - flattened <- flattenOrByPass(original) - } yield flattened - } - - // Like 'File', except that we don't read any contents before passing the path over to cwltool to canonicalize. - private def preProcessRemoteCwl(reference: CwlReference)(implicit cs: ContextShift[IO]): IOChecked[Json] = { - val cwlCanonicalizer = new CwlCanonicalizer(saladFunction) - cwlCanonicalizer.getCanonicalCwl(reference) - } -} - -object CwlPreProcessor { - private val Log = LoggerFactory.getLogger("CwlPreProcessor") - - private [preprocessor] type SaladFunction = CwlReference => IOChecked[String] - private [preprocessor] val JsonKeyNamespaces = s"$$namespaces" - private [preprocessor] val JsonKeySchemas = s"$$schemas" - - private def saladSpinner(doLogging: Boolean): SaladFunction = ref => { - if (doLogging) { - Log.info(s"Pre-Processing ${ref.pathAsString}") - } - - CwlDecoder.saladCwlFile(ref) - } - - private [preprocessor] val saladCwlFile: SaladFunction = saladSpinner(true) - private val saladCwlFileWithoutLogging: SaladFunction = saladSpinner(false) - - implicit class PrintableJson(val json: Json) extends AnyVal { - def printCompact = io.circe.Printer.noSpaces.print(json) - } - - def noLogging = new CwlPreProcessor(saladCwlFileWithoutLogging) - - // Fold over a json recursively and prefix all files - def mapFilesAndDirectories(mappingFunction: String => String)(json: Json): Json = { - // Function to check if the given json has the provided key / value pair - def hasKeyValue(key: String, value: String): Json => Boolean = { - root.selectDynamic(key).string.exist(_.equalsIgnoreCase(value)) - } - - // Return true if the given json object represents a File - def isFile(obj: JsonObject) = hasKeyValue("class", "File")(Json.fromJsonObject(obj)) - - // Return true if the given json object represents a Directory - def isDirectory(obj: JsonObject) = hasKeyValue("class", "Directory")(Json.fromJsonObject(obj)) - - // Modify the string at "key" using the mappingFunction - def mapStringValue(key: String, mappingFunction: String => String): Json => Json = root.selectDynamic(key).string.modify(mappingFunction) - - // Map "location" and "default" - def prefix(mappingFunction: String => String): Json => Json = mapStringValue("location", mappingFunction).compose(mapStringValue("path", mappingFunction)) - - // Prefix the location or path in the json object if it's a file or directory, otherwise recurse over its fields - def prefixObject(mappingFunction: String => String)(obj: JsonObject): Json = { - // If the object is file or a directory, prefix it with the gcs prefix - if (isFile(obj) || isDirectory(obj)) { - prefix(mappingFunction)(Json.fromJsonObject(obj)) - // Even if it's a file it may have secondary files. So keep recursing on its fields - .mapObject(_.mapValues(mapFilesAndDirectories(mappingFunction))) - } - // Otherwise recursively process its fields - else Json.fromJsonObject(obj.mapValues(mapFilesAndDirectories(mappingFunction))) - } - - json.fold( - jsonNull = json, - jsonBoolean = _ => json, - jsonNumber = _ => json, - jsonString = _ => json, - jsonObject = prefixObject(mappingFunction), - jsonArray = arr => Json.arr(arr.map(mapFilesAndDirectories(mappingFunction)): _*) - ) - } - - private [preprocessor] def mapNumbers(json: Json): Json = { - // Circumvent Circe's scientific format for numbers: convert to a JSON String without exponential notation. - def nonScientificNumberFormatting(jsonNumber: JsonNumber): Json = { - val conversions = LazyList[JsonNumber => Option[Any]]( - _.toBigInt.map(_.longValue), - _.toBigDecimal.map(_.doubleValue), - Function.const(Option("null"))) - - // The `get` is safe because `Option("null")` guarantees a match even if the other two Stream elements - // do not satisfy the predicate. - conversions.map(_.apply(jsonNumber)).find(_.isDefined).flatten.get.toString |> Json.fromString - } - - json.fold( - jsonNull = json, - jsonBoolean = _ => json, - jsonNumber = nonScientificNumberFormatting, - jsonString = _ => json, - jsonObject = _.mapValues(mapNumbers) |> Json.fromJsonObject, - jsonArray = _.map(mapNumbers) |> Json.fromValues - ) - } - - private [preprocessor] def parseJson(in: String): IOChecked[Json] = { - io.circe.parser.parse(in).leftMap(error => NonEmptyList.one(error.message)).toIOChecked - } - - private [preprocessor] def parseYaml(in: String): IOChecked[Json] = { - val yaml = YamlUtils.parse(in) - yaml.leftMap(error => NonEmptyList.one(error.message)).toIOChecked - } - - /** - * Given a json, collect all tools or workflows and map them with their reference id. - * A saladed JSON is assumed. - */ - private [preprocessor] def mapIdToContent(json: Json): List[(CwlReference, Json)] = { - json.asArray match { - case Some(cwls) => cwls.toList.flatMap(mapIdToContent) - case None => root.id.string.getOption(json).flatMap(_.asReference).map(_ -> json).toList - } - } -} diff --git a/cwl/src/main/scala/cwl/preprocessor/CwlReference.scala b/cwl/src/main/scala/cwl/preprocessor/CwlReference.scala deleted file mode 100644 index badddf028b4..00000000000 --- a/cwl/src/main/scala/cwl/preprocessor/CwlReference.scala +++ /dev/null @@ -1,82 +0,0 @@ -package cwl.preprocessor - -import better.files.{File => BFile} -import cwl.preprocessor.CwlReference._ -import cwl.{FileAndId, FullyQualifiedName} -import cwl.command.ParentName - -sealed trait CwlReference { - def pathAsString: String - def pointerWithinFile: Option[String] - def changePointer(to: Option[String]): CwlReference - - private def pointerWithHash: String = pointerWithinFile.map(p => s"#$p").getOrElse("") - lazy val fullReference: String = s"$pathAsString$pointerWithHash" - - override def toString: String = fullReference -} - -/** - * Saladed CWLs reference other local CWL "node" (workflow or tool) using a URI as follow: - * file:///path/to/file/containing/node.cwl[#pointer_to_node] - * #pointer_to_node to node is optional, and will specify which workflow or tool is being targeted in the file. - * - * e.g: - * { - * "class": "Workflow", - * "id": "file:///path/to/workflow/workflow.cwl", - * ... - * "steps": [ - * { - * "run": "file:///path/to/workflow/multi_tools.cwl#my_tool", - * ... - * } - * ] - * } - * - * This snippet contains 2 references, one that is the ID of this workflow, the other one is the run step pointing to "my_tool" in "/path/to/workflow/multi_tools.cwl" - * - */ -final case class CwlFileReference(file: BFile, pointerWithinFile: Option[String]) extends CwlReference { - override val pathAsString: String = s"$LocalScheme${file.toString}" - override def changePointer(to: Option[String]): CwlReference = this.copy(pointerWithinFile = to) -} - -final case class CwlHttpReference(pathAsString: String, pointerWithinFile: Option[String]) extends CwlReference { - override def changePointer(to: Option[String]): CwlReference = this.copy(pointerWithinFile = to) -} - -object CwlReference { - val LocalScheme = "file://" - val HttpScheme = "http://" - val HttpsScheme = "https://" - - implicit class EnhancedCwlId(val id: String) extends AnyVal { - def asReference: Option[CwlReference] = CwlReference.fromString(id) - def stripFilePrefix = id.stripPrefix(LocalScheme) - } - - val ReferenceRegex = "(.*://)?([^#]*)(#(.*))?".r - - def fromString(in: String): Option[CwlReference] = { - in match { - case ReferenceRegex(scheme, path, _, pointerWithinFile) => - if (scheme == LocalScheme) { - FullyQualifiedName.maybeApply(in)(ParentName.empty) match { - case Some(FileAndId(file, _, _)) => Option(CwlFileReference(BFile(file.stripFilePrefix), Option(pointerWithinFile))) - case _ => Option(CwlFileReference(BFile(in.stripFilePrefix), Option(pointerWithinFile))) - } - } else if (scheme == HttpScheme || scheme == HttpsScheme) { - Option(CwlHttpReference(s"$scheme$path", Option(pointerWithinFile))) - } else { - None - } - } - } -} - -object CwlFileReference { - def apply(file: BFile, pointer: Option[String]) = { - new CwlFileReference(file, pointer) - } -} diff --git a/cwl/src/main/scala/cwl/requirement/RequirementToAttributeMap.scala b/cwl/src/main/scala/cwl/requirement/RequirementToAttributeMap.scala deleted file mode 100644 index 1a727837172..00000000000 --- a/cwl/src/main/scala/cwl/requirement/RequirementToAttributeMap.scala +++ /dev/null @@ -1,79 +0,0 @@ -package cwl.requirement - -import cwl._ -import shapeless.Poly1 -import wom.RuntimeAttributesKeys._ -import wom.expression.{ValueAsAnExpression, WomExpression} -import wom.values.{WomLong, WomString} - -object RequirementToAttributeMap extends Poly1 { - type ResourcesToExpressionMap = (Set[String], ExpressionLib) => Map[String, WomExpression] - implicit def fromJs: Case.Aux[InlineJavascriptRequirement, ResourcesToExpressionMap] = at[InlineJavascriptRequirement] { - _ => (_,_) => Map.empty - } - - implicit def fromSchemaDef: Case.Aux[SchemaDefRequirement, ResourcesToExpressionMap] = at[SchemaDefRequirement] { - _ => (_,_) => Map.empty - } - - implicit def fromDocker: Case.Aux[DockerRequirement, ResourcesToExpressionMap] = at[DockerRequirement] { - docker => (_,_) => docker.dockerPull.orElse(docker.dockerImageId).map({ pull => - DockerKey -> ValueAsAnExpression(WomString(pull)) - }).toMap - } - - implicit def fromSoftware: Case.Aux[SoftwareRequirement, ResourcesToExpressionMap] = at[SoftwareRequirement] { - _ => (_,_) => Map.empty - } - - implicit def fromInitialWorkDir: Case.Aux[InitialWorkDirRequirement, ResourcesToExpressionMap] = at[InitialWorkDirRequirement] { - _ => (_,_) => Map.empty - } - - implicit def fromEnvVar: Case.Aux[EnvVarRequirement, ResourcesToExpressionMap] = at[EnvVarRequirement] { - _ => (_,_) => Map.empty - } - - implicit def fromShellCommand: Case.Aux[ShellCommandRequirement, ResourcesToExpressionMap] = at[ShellCommandRequirement] { - _ => (_,_) => Map.empty - } - - implicit def fromResource: Case.Aux[ResourceRequirement, ResourcesToExpressionMap] = at[ResourceRequirement] { - resource => (inputNames, expressionLib) => - def toExpression(resourceRequirement: ResourceRequirementType) = - resourceRequirement.fold(ResourceRequirementToWomExpression).apply(inputNames, expressionLib) - - List( - // Map cpuMin to both cpuMin and cpu keys - resource.effectiveCoreMin.toList.map(toExpression).flatMap(min => List(CpuMinKey -> min, CpuKey -> min)), - resource.effectiveCoreMax.toList.map(toExpression).map(CpuMaxKey -> _), - // Map ramMin to both memoryMin and memory keys - resource.effectiveRamMin.toList.map(toExpression).flatMap(min => List(MemoryMinKey -> min, MemoryKey -> min)), - resource.effectiveRamMax.toList.map(toExpression).map(MemoryMaxKey -> _), - resource.effectiveTmpdirMin.toList.map(toExpression).map(TmpDirMinKey -> _), - resource.effectiveTmpdirMax.toList.map(toExpression).map(TmpDirMaxKey -> _), - resource.effectiveOutdirMin.toList.map(toExpression).map(OutDirMinKey -> _), - resource.effectiveOutdirMax.toList.map(toExpression).map(OutDirMaxKey -> _) - ).flatten.toMap - } - - implicit def fromInputResourceRequirement: Case.Aux[DnaNexusInputResourceRequirement, ResourcesToExpressionMap] = at[DnaNexusInputResourceRequirement] { - case DnaNexusInputResourceRequirement(_, indirMin) => (_, _) => indirMin.map(value => ValueAsAnExpression(WomLong(value))).map(DnaNexusInputDirMinKey -> _).toMap - } - - implicit def fromSubWorkflow: Case.Aux[SubworkflowFeatureRequirement, ResourcesToExpressionMap] = at[SubworkflowFeatureRequirement] { - _ => (_,_) => Map.empty - } - - implicit def fromScatter: Case.Aux[ScatterFeatureRequirement, ResourcesToExpressionMap] = at[ScatterFeatureRequirement] { - _ => (_,_) => Map.empty - } - - implicit def fromMultipleInput: Case.Aux[MultipleInputFeatureRequirement, ResourcesToExpressionMap] = at[MultipleInputFeatureRequirement] { - _ => (_,_) => Map.empty - } - - implicit def fromStepInput: Case.Aux[StepInputExpressionRequirement, ResourcesToExpressionMap] = at[StepInputExpressionRequirement] { - _ => (_,_) => Map.empty - } -} diff --git a/cwl/src/main/scala/cwl/requirement/ResourceRequirementToWomExpression.scala b/cwl/src/main/scala/cwl/requirement/ResourceRequirementToWomExpression.scala deleted file mode 100644 index 74727947657..00000000000 --- a/cwl/src/main/scala/cwl/requirement/ResourceRequirementToWomExpression.scala +++ /dev/null @@ -1,15 +0,0 @@ -package cwl.requirement - -import cwl.{Expression, ExpressionLib} -import shapeless.Poly1 -import wom.expression.{ValueAsAnExpression, WomExpression} -import wom.values.{WomLong, WomString} - -object ResourceRequirementToWomExpression extends Poly1 { - type ResourceRequirementStringSetToWomExpression = (Set[String], ExpressionLib) => WomExpression - implicit def fromLong: Case.Aux[Long, ResourceRequirementStringSetToWomExpression] = at[Long] { l => (_, _) => ValueAsAnExpression(WomLong(l)) } - implicit def fromString: Case.Aux[String, ResourceRequirementStringSetToWomExpression] = at[String] { s => (_, _) => ValueAsAnExpression(WomString(s)) } - implicit def fromExpression: Case.Aux[Expression, ResourceRequirementStringSetToWomExpression] = at[Expression] { e => (inputs, expressionLib) => - cwl.ECMAScriptWomExpression(e, inputs, expressionLib) - } -} diff --git a/cwl/src/test/resources/1st-tool.cwl b/cwl/src/test/resources/1st-tool.cwl deleted file mode 100755 index af0c4de297d..00000000000 --- a/cwl/src/test/resources/1st-tool.cwl +++ /dev/null @@ -1,9 +0,0 @@ -cwlVersion: v1.0 -class: CommandLineTool -baseCommand: echo -inputs: - message: - type: string - inputBinding: - position: 1 -outputs: [] diff --git a/cwl/src/test/resources/1st-workflow.cwl b/cwl/src/test/resources/1st-workflow.cwl deleted file mode 100644 index ce4b4b0bd71..00000000000 --- a/cwl/src/test/resources/1st-workflow.cwl +++ /dev/null @@ -1,23 +0,0 @@ -cwlVersion: v1.0 -class: Workflow -inputs: - inp: File - ex: string -outputs: - classout: - type: File - outputSource: compile/classfile - -steps: - untar: - run: tar-param.cwl - in: - tarfile: inp - extractfile: ex - out: [example_out] - - compile: - run: arguments.cwl - in: - src: untar/example_out - out: [classfile] diff --git a/cwl/src/test/resources/application.conf b/cwl/src/test/resources/application.conf deleted file mode 100644 index 8d12b670cd3..00000000000 --- a/cwl/src/test/resources/application.conf +++ /dev/null @@ -1,14 +0,0 @@ -akka { - log-dead-letters = "off" - loggers = ["akka.event.slf4j.Slf4jLogger"] -} - -ontology { - # Uncomment to enable caching of ontologies. Improves performance when loading ontologies from remote IRIs. - #cache { - # max-size = 20 - #} - retries = 3 - pool-size = 3 - backoff-time = 2 seconds -} diff --git a/cwl/src/test/resources/arguments.cwl b/cwl/src/test/resources/arguments.cwl deleted file mode 100644 index 730030e7437..00000000000 --- a/cwl/src/test/resources/arguments.cwl +++ /dev/null @@ -1,22 +0,0 @@ -cwlVersion: "v1.0" -class: "CommandLineTool" -label: "Example trivial wrapper for Java 7 compiler" -hints: - - dockerPull: "java:7-jdk" - class: "DockerRequirement" -baseCommand: "javac" -arguments: - - "-d" - - "$(runtime.outdir)" -inputs: - - type: "File" - inputBinding: - position: 1 - id: "file:///home/dan/wdl4s/arguments.cwl#src" -outputs: - - type: "File" - outputBinding: - glob: "*.class" - id: "file:///home/dan/wdl4s/arguments.cwl#classfile" -id: "file:///home/dan/wdl4s/arguments.cwl" -name: "file:///home/dan/wdl4s/arguments.cwl" diff --git a/cwl/src/test/resources/bad.cwl b/cwl/src/test/resources/bad.cwl deleted file mode 100644 index d38ed298731..00000000000 --- a/cwl/src/test/resources/bad.cwl +++ /dev/null @@ -1 +0,0 @@ -gibberish diff --git a/cwl/src/test/resources/bad2.cwl b/cwl/src/test/resources/bad2.cwl deleted file mode 100644 index d38ed298731..00000000000 --- a/cwl/src/test/resources/bad2.cwl +++ /dev/null @@ -1 +0,0 @@ -gibberish diff --git a/cwl/src/test/resources/brokenlinks.cwl b/cwl/src/test/resources/brokenlinks.cwl deleted file mode 100644 index c710358ce61..00000000000 --- a/cwl/src/test/resources/brokenlinks.cwl +++ /dev/null @@ -1,23 +0,0 @@ -cwlVersion: v1.0 -class: Workflow -inputs: - inp: File - ex: string -outputs: - classout: - type: File - outputSource: compile/classfile - -steps: - untar: - run: wrong.cwl - in: - tarfile: inp - extractfile: ex - out: [example_out] - - compile: - run: wrong2.cwl - in: - src: untar/example_out - out: [classfile] diff --git a/cwl/src/test/resources/cwl/lodash.js b/cwl/src/test/resources/cwl/lodash.js deleted file mode 100644 index 9b95dfefe87..00000000000 --- a/cwl/src/test/resources/cwl/lodash.js +++ /dev/null @@ -1,17112 +0,0 @@ -/** - * @license - * Lodash - * Copyright OpenJS Foundation and other contributors - * Released under MIT license - * Based on Underscore.js 1.8.3 - * Copyright Jeremy Ashkenas, DocumentCloud and Investigative Reporters & Editors - */ -;(function() { - - /** Used as a safe reference for `undefined` in pre-ES5 environments. */ - var undefined; - - /** Used as the semantic version number. */ - var VERSION = '4.17.15'; - - /** Used as the size to enable large array optimizations. */ - var LARGE_ARRAY_SIZE = 200; - - /** Error message constants. */ - var CORE_ERROR_TEXT = 'Unsupported core-js use. Try https://npms.io/search?q=ponyfill.', - FUNC_ERROR_TEXT = 'Expected a function'; - - /** Used to stand-in for `undefined` hash values. */ - var HASH_UNDEFINED = '__lodash_hash_undefined__'; - - /** Used as the maximum memoize cache size. */ - var MAX_MEMOIZE_SIZE = 500; - - /** Used as the internal argument placeholder. */ - var PLACEHOLDER = '__lodash_placeholder__'; - - /** Used to compose bitmasks for cloning. */ - var CLONE_DEEP_FLAG = 1, - CLONE_FLAT_FLAG = 2, - CLONE_SYMBOLS_FLAG = 4; - - /** Used to compose bitmasks for value comparisons. */ - var COMPARE_PARTIAL_FLAG = 1, - COMPARE_UNORDERED_FLAG = 2; - - /** Used to compose bitmasks for function metadata. */ - var WRAP_BIND_FLAG = 1, - WRAP_BIND_KEY_FLAG = 2, - WRAP_CURRY_BOUND_FLAG = 4, - WRAP_CURRY_FLAG = 8, - WRAP_CURRY_RIGHT_FLAG = 16, - WRAP_PARTIAL_FLAG = 32, - WRAP_PARTIAL_RIGHT_FLAG = 64, - WRAP_ARY_FLAG = 128, - WRAP_REARG_FLAG = 256, - WRAP_FLIP_FLAG = 512; - - /** Used as default options for `_.truncate`. */ - var DEFAULT_TRUNC_LENGTH = 30, - DEFAULT_TRUNC_OMISSION = '...'; - - /** Used to detect hot functions by number of calls within a span of milliseconds. */ - var HOT_COUNT = 800, - HOT_SPAN = 16; - - /** Used to indicate the type of lazy iteratees. */ - var LAZY_FILTER_FLAG = 1, - LAZY_MAP_FLAG = 2, - LAZY_WHILE_FLAG = 3; - - /** Used as references for various `Number` constants. */ - var INFINITY = 1 / 0, - MAX_SAFE_INTEGER = 9007199254740991, - MAX_INTEGER = 1.7976931348623157e+308, - NAN = 0 / 0; - - /** Used as references for the maximum length and index of an array. */ - var MAX_ARRAY_LENGTH = 4294967295, - MAX_ARRAY_INDEX = MAX_ARRAY_LENGTH - 1, - HALF_MAX_ARRAY_LENGTH = MAX_ARRAY_LENGTH >>> 1; - - /** Used to associate wrap methods with their bit flags. */ - var wrapFlags = [ - ['ary', WRAP_ARY_FLAG], - ['bind', WRAP_BIND_FLAG], - ['bindKey', WRAP_BIND_KEY_FLAG], - ['curry', WRAP_CURRY_FLAG], - ['curryRight', WRAP_CURRY_RIGHT_FLAG], - ['flip', WRAP_FLIP_FLAG], - ['partial', WRAP_PARTIAL_FLAG], - ['partialRight', WRAP_PARTIAL_RIGHT_FLAG], - ['rearg', WRAP_REARG_FLAG] - ]; - - /** `Object#toString` result references. */ - var argsTag = '[object Arguments]', - arrayTag = '[object Array]', - asyncTag = '[object AsyncFunction]', - boolTag = '[object Boolean]', - dateTag = '[object Date]', - domExcTag = '[object DOMException]', - errorTag = '[object Error]', - funcTag = '[object Function]', - genTag = '[object GeneratorFunction]', - mapTag = '[object Map]', - numberTag = '[object Number]', - nullTag = '[object Null]', - objectTag = '[object Object]', - promiseTag = '[object Promise]', - proxyTag = '[object Proxy]', - regexpTag = '[object RegExp]', - setTag = '[object Set]', - stringTag = '[object String]', - symbolTag = '[object Symbol]', - undefinedTag = '[object Undefined]', - weakMapTag = '[object WeakMap]', - weakSetTag = '[object WeakSet]'; - - var arrayBufferTag = '[object ArrayBuffer]', - dataViewTag = '[object DataView]', - float32Tag = '[object Float32Array]', - float64Tag = '[object Float64Array]', - int8Tag = '[object Int8Array]', - int16Tag = '[object Int16Array]', - int32Tag = '[object Int32Array]', - uint8Tag = '[object Uint8Array]', - uint8ClampedTag = '[object Uint8ClampedArray]', - uint16Tag = '[object Uint16Array]', - uint32Tag = '[object Uint32Array]'; - - /** Used to match empty string literals in compiled template source. */ - var reEmptyStringLeading = /\b__p \+= '';/g, - reEmptyStringMiddle = /\b(__p \+=) '' \+/g, - reEmptyStringTrailing = /(__e\(.*?\)|\b__t\)) \+\n'';/g; - - /** Used to match HTML entities and HTML characters. */ - var reEscapedHtml = /&(?:amp|lt|gt|quot|#39);/g, - reUnescapedHtml = /[&<>"']/g, - reHasEscapedHtml = RegExp(reEscapedHtml.source), - reHasUnescapedHtml = RegExp(reUnescapedHtml.source); - - /** Used to match template delimiters. */ - var reEscape = /<%-([\s\S]+?)%>/g, - reEvaluate = /<%([\s\S]+?)%>/g, - reInterpolate = /<%=([\s\S]+?)%>/g; - - /** Used to match property names within property paths. */ - var reIsDeepProp = /\.|\[(?:[^[\]]*|(["'])(?:(?!\1)[^\\]|\\.)*?\1)\]/, - reIsPlainProp = /^\w*$/, - rePropName = /[^.[\]]+|\[(?:(-?\d+(?:\.\d+)?)|(["'])((?:(?!\2)[^\\]|\\.)*?)\2)\]|(?=(?:\.|\[\])(?:\.|\[\]|$))/g; - - /** - * Used to match `RegExp` - * [syntax characters](http://ecma-international.org/ecma-262/7.0/#sec-patterns). - */ - var reRegExpChar = /[\\^$.*+?()[\]{}|]/g, - reHasRegExpChar = RegExp(reRegExpChar.source); - - /** Used to match leading and trailing whitespace. */ - var reTrim = /^\s+|\s+$/g, - reTrimStart = /^\s+/, - reTrimEnd = /\s+$/; - - /** Used to match wrap detail comments. */ - var reWrapComment = /\{(?:\n\/\* \[wrapped with .+\] \*\/)?\n?/, - reWrapDetails = /\{\n\/\* \[wrapped with (.+)\] \*/, - reSplitDetails = /,? & /; - - /** Used to match words composed of alphanumeric characters. */ - var reAsciiWord = /[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+/g; - - /** Used to match backslashes in property paths. */ - var reEscapeChar = /\\(\\)?/g; - - /** - * Used to match - * [ES template delimiters](http://ecma-international.org/ecma-262/7.0/#sec-template-literal-lexical-components). - */ - var reEsTemplate = /\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g; - - /** Used to match `RegExp` flags from their coerced string values. */ - var reFlags = /\w*$/; - - /** Used to detect bad signed hexadecimal string values. */ - var reIsBadHex = /^[-+]0x[0-9a-f]+$/i; - - /** Used to detect binary string values. */ - var reIsBinary = /^0b[01]+$/i; - - /** Used to detect host constructors (Safari). */ - var reIsHostCtor = /^\[object .+?Constructor\]$/; - - /** Used to detect octal string values. */ - var reIsOctal = /^0o[0-7]+$/i; - - /** Used to detect unsigned integer values. */ - var reIsUint = /^(?:0|[1-9]\d*)$/; - - /** Used to match Latin Unicode letters (excluding mathematical operators). */ - var reLatin = /[\xc0-\xd6\xd8-\xf6\xf8-\xff\u0100-\u017f]/g; - - /** Used to ensure capturing order of template delimiters. */ - var reNoMatch = /($^)/; - - /** Used to match unescaped characters in compiled string literals. */ - var reUnescapedString = /['\n\r\u2028\u2029\\]/g; - - /** Used to compose unicode character classes. */ - var rsAstralRange = '\\ud800-\\udfff', - rsComboMarksRange = '\\u0300-\\u036f', - reComboHalfMarksRange = '\\ufe20-\\ufe2f', - rsComboSymbolsRange = '\\u20d0-\\u20ff', - rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange, - rsDingbatRange = '\\u2700-\\u27bf', - rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff', - rsMathOpRange = '\\xac\\xb1\\xd7\\xf7', - rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf', - rsPunctuationRange = '\\u2000-\\u206f', - rsSpaceRange = ' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000', - rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde', - rsVarRange = '\\ufe0e\\ufe0f', - rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange; - - /** Used to compose unicode capture groups. */ - var rsApos = "['\u2019]", - rsAstral = '[' + rsAstralRange + ']', - rsBreak = '[' + rsBreakRange + ']', - rsCombo = '[' + rsComboRange + ']', - rsDigits = '\\d+', - rsDingbat = '[' + rsDingbatRange + ']', - rsLower = '[' + rsLowerRange + ']', - rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']', - rsFitz = '\\ud83c[\\udffb-\\udfff]', - rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')', - rsNonAstral = '[^' + rsAstralRange + ']', - rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}', - rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]', - rsUpper = '[' + rsUpperRange + ']', - rsZWJ = '\\u200d'; - - /** Used to compose unicode regexes. */ - var rsMiscLower = '(?:' + rsLower + '|' + rsMisc + ')', - rsMiscUpper = '(?:' + rsUpper + '|' + rsMisc + ')', - rsOptContrLower = '(?:' + rsApos + '(?:d|ll|m|re|s|t|ve))?', - rsOptContrUpper = '(?:' + rsApos + '(?:D|LL|M|RE|S|T|VE))?', - reOptMod = rsModifier + '?', - rsOptVar = '[' + rsVarRange + ']?', - rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*', - rsOrdLower = '\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])', - rsOrdUpper = '\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])', - rsSeq = rsOptVar + reOptMod + rsOptJoin, - rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq, - rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')'; - - /** Used to match apostrophes. */ - var reApos = RegExp(rsApos, 'g'); - - /** - * Used to match [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks) and - * [combining diacritical marks for symbols](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks_for_Symbols). - */ - var reComboMark = RegExp(rsCombo, 'g'); - - /** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */ - var reUnicode = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g'); - - /** Used to match complex or compound words. */ - var reUnicodeWord = RegExp([ - rsUpper + '?' + rsLower + '+' + rsOptContrLower + '(?=' + [rsBreak, rsUpper, '$'].join('|') + ')', - rsMiscUpper + '+' + rsOptContrUpper + '(?=' + [rsBreak, rsUpper + rsMiscLower, '$'].join('|') + ')', - rsUpper + '?' + rsMiscLower + '+' + rsOptContrLower, - rsUpper + '+' + rsOptContrUpper, - rsOrdUpper, - rsOrdLower, - rsDigits, - rsEmoji - ].join('|'), 'g'); - - /** Used to detect strings with [zero-width joiners or code points from the astral planes](http://eev.ee/blog/2015/09/12/dark-corners-of-unicode/). */ - var reHasUnicode = RegExp('[' + rsZWJ + rsAstralRange + rsComboRange + rsVarRange + ']'); - - /** Used to detect strings that need a more robust regexp to match words. */ - var reHasUnicodeWord = /[a-z][A-Z]|[A-Z]{2}[a-z]|[0-9][a-zA-Z]|[a-zA-Z][0-9]|[^a-zA-Z0-9 ]/; - - /** Used to assign default `context` object properties. */ - var contextProps = [ - 'Array', 'Buffer', 'DataView', 'Date', 'Error', 'Float32Array', 'Float64Array', - 'Function', 'Int8Array', 'Int16Array', 'Int32Array', 'Map', 'Math', 'Object', - 'Promise', 'RegExp', 'Set', 'String', 'Symbol', 'TypeError', 'Uint8Array', - 'Uint8ClampedArray', 'Uint16Array', 'Uint32Array', 'WeakMap', - '_', 'clearTimeout', 'isFinite', 'parseInt', 'setTimeout' - ]; - - /** Used to make template sourceURLs easier to identify. */ - var templateCounter = -1; - - /** Used to identify `toStringTag` values of typed arrays. */ - var typedArrayTags = {}; - typedArrayTags[float32Tag] = typedArrayTags[float64Tag] = - typedArrayTags[int8Tag] = typedArrayTags[int16Tag] = - typedArrayTags[int32Tag] = typedArrayTags[uint8Tag] = - typedArrayTags[uint8ClampedTag] = typedArrayTags[uint16Tag] = - typedArrayTags[uint32Tag] = true; - typedArrayTags[argsTag] = typedArrayTags[arrayTag] = - typedArrayTags[arrayBufferTag] = typedArrayTags[boolTag] = - typedArrayTags[dataViewTag] = typedArrayTags[dateTag] = - typedArrayTags[errorTag] = typedArrayTags[funcTag] = - typedArrayTags[mapTag] = typedArrayTags[numberTag] = - typedArrayTags[objectTag] = typedArrayTags[regexpTag] = - typedArrayTags[setTag] = typedArrayTags[stringTag] = - typedArrayTags[weakMapTag] = false; - - /** Used to identify `toStringTag` values supported by `_.clone`. */ - var cloneableTags = {}; - cloneableTags[argsTag] = cloneableTags[arrayTag] = - cloneableTags[arrayBufferTag] = cloneableTags[dataViewTag] = - cloneableTags[boolTag] = cloneableTags[dateTag] = - cloneableTags[float32Tag] = cloneableTags[float64Tag] = - cloneableTags[int8Tag] = cloneableTags[int16Tag] = - cloneableTags[int32Tag] = cloneableTags[mapTag] = - cloneableTags[numberTag] = cloneableTags[objectTag] = - cloneableTags[regexpTag] = cloneableTags[setTag] = - cloneableTags[stringTag] = cloneableTags[symbolTag] = - cloneableTags[uint8Tag] = cloneableTags[uint8ClampedTag] = - cloneableTags[uint16Tag] = cloneableTags[uint32Tag] = true; - cloneableTags[errorTag] = cloneableTags[funcTag] = - cloneableTags[weakMapTag] = false; - - /** Used to map Latin Unicode letters to basic Latin letters. */ - var deburredLetters = { - // Latin-1 Supplement block. - '\xc0': 'A', '\xc1': 'A', '\xc2': 'A', '\xc3': 'A', '\xc4': 'A', '\xc5': 'A', - '\xe0': 'a', '\xe1': 'a', '\xe2': 'a', '\xe3': 'a', '\xe4': 'a', '\xe5': 'a', - '\xc7': 'C', '\xe7': 'c', - '\xd0': 'D', '\xf0': 'd', - '\xc8': 'E', '\xc9': 'E', '\xca': 'E', '\xcb': 'E', - '\xe8': 'e', '\xe9': 'e', '\xea': 'e', '\xeb': 'e', - '\xcc': 'I', '\xcd': 'I', '\xce': 'I', '\xcf': 'I', - '\xec': 'i', '\xed': 'i', '\xee': 'i', '\xef': 'i', - '\xd1': 'N', '\xf1': 'n', - '\xd2': 'O', '\xd3': 'O', '\xd4': 'O', '\xd5': 'O', '\xd6': 'O', '\xd8': 'O', - '\xf2': 'o', '\xf3': 'o', '\xf4': 'o', '\xf5': 'o', '\xf6': 'o', '\xf8': 'o', - '\xd9': 'U', '\xda': 'U', '\xdb': 'U', '\xdc': 'U', - '\xf9': 'u', '\xfa': 'u', '\xfb': 'u', '\xfc': 'u', - '\xdd': 'Y', '\xfd': 'y', '\xff': 'y', - '\xc6': 'Ae', '\xe6': 'ae', - '\xde': 'Th', '\xfe': 'th', - '\xdf': 'ss', - // Latin Extended-A block. - '\u0100': 'A', '\u0102': 'A', '\u0104': 'A', - '\u0101': 'a', '\u0103': 'a', '\u0105': 'a', - '\u0106': 'C', '\u0108': 'C', '\u010a': 'C', '\u010c': 'C', - '\u0107': 'c', '\u0109': 'c', '\u010b': 'c', '\u010d': 'c', - '\u010e': 'D', '\u0110': 'D', '\u010f': 'd', '\u0111': 'd', - '\u0112': 'E', '\u0114': 'E', '\u0116': 'E', '\u0118': 'E', '\u011a': 'E', - '\u0113': 'e', '\u0115': 'e', '\u0117': 'e', '\u0119': 'e', '\u011b': 'e', - '\u011c': 'G', '\u011e': 'G', '\u0120': 'G', '\u0122': 'G', - '\u011d': 'g', '\u011f': 'g', '\u0121': 'g', '\u0123': 'g', - '\u0124': 'H', '\u0126': 'H', '\u0125': 'h', '\u0127': 'h', - '\u0128': 'I', '\u012a': 'I', '\u012c': 'I', '\u012e': 'I', '\u0130': 'I', - '\u0129': 'i', '\u012b': 'i', '\u012d': 'i', '\u012f': 'i', '\u0131': 'i', - '\u0134': 'J', '\u0135': 'j', - '\u0136': 'K', '\u0137': 'k', '\u0138': 'k', - '\u0139': 'L', '\u013b': 'L', '\u013d': 'L', '\u013f': 'L', '\u0141': 'L', - '\u013a': 'l', '\u013c': 'l', '\u013e': 'l', '\u0140': 'l', '\u0142': 'l', - '\u0143': 'N', '\u0145': 'N', '\u0147': 'N', '\u014a': 'N', - '\u0144': 'n', '\u0146': 'n', '\u0148': 'n', '\u014b': 'n', - '\u014c': 'O', '\u014e': 'O', '\u0150': 'O', - '\u014d': 'o', '\u014f': 'o', '\u0151': 'o', - '\u0154': 'R', '\u0156': 'R', '\u0158': 'R', - '\u0155': 'r', '\u0157': 'r', '\u0159': 'r', - '\u015a': 'S', '\u015c': 'S', '\u015e': 'S', '\u0160': 'S', - '\u015b': 's', '\u015d': 's', '\u015f': 's', '\u0161': 's', - '\u0162': 'T', '\u0164': 'T', '\u0166': 'T', - '\u0163': 't', '\u0165': 't', '\u0167': 't', - '\u0168': 'U', '\u016a': 'U', '\u016c': 'U', '\u016e': 'U', '\u0170': 'U', '\u0172': 'U', - '\u0169': 'u', '\u016b': 'u', '\u016d': 'u', '\u016f': 'u', '\u0171': 'u', '\u0173': 'u', - '\u0174': 'W', '\u0175': 'w', - '\u0176': 'Y', '\u0177': 'y', '\u0178': 'Y', - '\u0179': 'Z', '\u017b': 'Z', '\u017d': 'Z', - '\u017a': 'z', '\u017c': 'z', '\u017e': 'z', - '\u0132': 'IJ', '\u0133': 'ij', - '\u0152': 'Oe', '\u0153': 'oe', - '\u0149': "'n", '\u017f': 's' - }; - - /** Used to map characters to HTML entities. */ - var htmlEscapes = { - '&': '&', - '<': '<', - '>': '>', - '"': '"', - "'": ''' - }; - - /** Used to map HTML entities to characters. */ - var htmlUnescapes = { - '&': '&', - '<': '<', - '>': '>', - '"': '"', - ''': "'" - }; - - /** Used to escape characters for inclusion in compiled string literals. */ - var stringEscapes = { - '\\': '\\', - "'": "'", - '\n': 'n', - '\r': 'r', - '\u2028': 'u2028', - '\u2029': 'u2029' - }; - - /** Built-in method references without a dependency on `root`. */ - var freeParseFloat = parseFloat, - freeParseInt = parseInt; - - /** Detect free variable `global` from Node.js. */ - var freeGlobal = typeof global == 'object' && global && global.Object === Object && global; - - /** Detect free variable `self`. */ - var freeSelf = typeof self == 'object' && self && self.Object === Object && self; - - /** Used as a reference to the global object. */ - var root = freeGlobal || freeSelf || Function('return this')(); - - /** Detect free variable `exports`. */ - var freeExports = typeof exports == 'object' && exports && !exports.nodeType && exports; - - /** Detect free variable `module`. */ - var freeModule = freeExports && typeof module == 'object' && module && !module.nodeType && module; - - /** Detect the popular CommonJS extension `module.exports`. */ - var moduleExports = freeModule && freeModule.exports === freeExports; - - /** Detect free variable `process` from Node.js. */ - var freeProcess = moduleExports && freeGlobal.process; - - /** Used to access faster Node.js helpers. */ - var nodeUtil = (function() { - try { - // Use `util.types` for Node.js 10+. - var types = freeModule && freeModule.require && freeModule.require('util').types; - - if (types) { - return types; - } - - // Legacy `process.binding('util')` for Node.js < 10. - return freeProcess && freeProcess.binding && freeProcess.binding('util'); - } catch (e) {} - }()); - - /* Node.js helper references. */ - var nodeIsArrayBuffer = nodeUtil && nodeUtil.isArrayBuffer, - nodeIsDate = nodeUtil && nodeUtil.isDate, - nodeIsMap = nodeUtil && nodeUtil.isMap, - nodeIsRegExp = nodeUtil && nodeUtil.isRegExp, - nodeIsSet = nodeUtil && nodeUtil.isSet, - nodeIsTypedArray = nodeUtil && nodeUtil.isTypedArray; - - /*--------------------------------------------------------------------------*/ - - /** - * A faster alternative to `Function#apply`, this function invokes `func` - * with the `this` binding of `thisArg` and the arguments of `args`. - * - * @private - * @param {Function} func The function to invoke. - * @param {*} thisArg The `this` binding of `func`. - * @param {Array} args The arguments to invoke `func` with. - * @returns {*} Returns the result of `func`. - */ - function apply(func, thisArg, args) { - switch (args.length) { - case 0: return func.call(thisArg); - case 1: return func.call(thisArg, args[0]); - case 2: return func.call(thisArg, args[0], args[1]); - case 3: return func.call(thisArg, args[0], args[1], args[2]); - } - return func.apply(thisArg, args); - } - - /** - * A specialized version of `baseAggregator` for arrays. - * - * @private - * @param {Array} [array] The array to iterate over. - * @param {Function} setter The function to set `accumulator` values. - * @param {Function} iteratee The iteratee to transform keys. - * @param {Object} accumulator The initial aggregated object. - * @returns {Function} Returns `accumulator`. - */ - function arrayAggregator(array, setter, iteratee, accumulator) { - var index = -1, - length = array == null ? 0 : array.length; - - while (++index < length) { - var value = array[index]; - setter(accumulator, value, iteratee(value), array); - } - return accumulator; - } - - /** - * A specialized version of `_.forEach` for arrays without support for - * iteratee shorthands. - * - * @private - * @param {Array} [array] The array to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {Array} Returns `array`. - */ - function arrayEach(array, iteratee) { - var index = -1, - length = array == null ? 0 : array.length; - - while (++index < length) { - if (iteratee(array[index], index, array) === false) { - break; - } - } - return array; - } - - /** - * A specialized version of `_.forEachRight` for arrays without support for - * iteratee shorthands. - * - * @private - * @param {Array} [array] The array to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {Array} Returns `array`. - */ - function arrayEachRight(array, iteratee) { - var length = array == null ? 0 : array.length; - - while (length--) { - if (iteratee(array[length], length, array) === false) { - break; - } - } - return array; - } - - /** - * A specialized version of `_.every` for arrays without support for - * iteratee shorthands. - * - * @private - * @param {Array} [array] The array to iterate over. - * @param {Function} predicate The function invoked per iteration. - * @returns {boolean} Returns `true` if all elements pass the predicate check, - * else `false`. - */ - function arrayEvery(array, predicate) { - var index = -1, - length = array == null ? 0 : array.length; - - while (++index < length) { - if (!predicate(array[index], index, array)) { - return false; - } - } - return true; - } - - /** - * A specialized version of `_.filter` for arrays without support for - * iteratee shorthands. - * - * @private - * @param {Array} [array] The array to iterate over. - * @param {Function} predicate The function invoked per iteration. - * @returns {Array} Returns the new filtered array. - */ - function arrayFilter(array, predicate) { - var index = -1, - length = array == null ? 0 : array.length, - resIndex = 0, - result = []; - - while (++index < length) { - var value = array[index]; - if (predicate(value, index, array)) { - result[resIndex++] = value; - } - } - return result; - } - - /** - * A specialized version of `_.includes` for arrays without support for - * specifying an index to search from. - * - * @private - * @param {Array} [array] The array to inspect. - * @param {*} target The value to search for. - * @returns {boolean} Returns `true` if `target` is found, else `false`. - */ - function arrayIncludes(array, value) { - var length = array == null ? 0 : array.length; - return !!length && baseIndexOf(array, value, 0) > -1; - } - - /** - * This function is like `arrayIncludes` except that it accepts a comparator. - * - * @private - * @param {Array} [array] The array to inspect. - * @param {*} target The value to search for. - * @param {Function} comparator The comparator invoked per element. - * @returns {boolean} Returns `true` if `target` is found, else `false`. - */ - function arrayIncludesWith(array, value, comparator) { - var index = -1, - length = array == null ? 0 : array.length; - - while (++index < length) { - if (comparator(value, array[index])) { - return true; - } - } - return false; - } - - /** - * A specialized version of `_.map` for arrays without support for iteratee - * shorthands. - * - * @private - * @param {Array} [array] The array to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {Array} Returns the new mapped array. - */ - function arrayMap(array, iteratee) { - var index = -1, - length = array == null ? 0 : array.length, - result = Array(length); - - while (++index < length) { - result[index] = iteratee(array[index], index, array); - } - return result; - } - - /** - * Appends the elements of `values` to `array`. - * - * @private - * @param {Array} array The array to modify. - * @param {Array} values The values to append. - * @returns {Array} Returns `array`. - */ - function arrayPush(array, values) { - var index = -1, - length = values.length, - offset = array.length; - - while (++index < length) { - array[offset + index] = values[index]; - } - return array; - } - - /** - * A specialized version of `_.reduce` for arrays without support for - * iteratee shorthands. - * - * @private - * @param {Array} [array] The array to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @param {*} [accumulator] The initial value. - * @param {boolean} [initAccum] Specify using the first element of `array` as - * the initial value. - * @returns {*} Returns the accumulated value. - */ - function arrayReduce(array, iteratee, accumulator, initAccum) { - var index = -1, - length = array == null ? 0 : array.length; - - if (initAccum && length) { - accumulator = array[++index]; - } - while (++index < length) { - accumulator = iteratee(accumulator, array[index], index, array); - } - return accumulator; - } - - /** - * A specialized version of `_.reduceRight` for arrays without support for - * iteratee shorthands. - * - * @private - * @param {Array} [array] The array to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @param {*} [accumulator] The initial value. - * @param {boolean} [initAccum] Specify using the last element of `array` as - * the initial value. - * @returns {*} Returns the accumulated value. - */ - function arrayReduceRight(array, iteratee, accumulator, initAccum) { - var length = array == null ? 0 : array.length; - if (initAccum && length) { - accumulator = array[--length]; - } - while (length--) { - accumulator = iteratee(accumulator, array[length], length, array); - } - return accumulator; - } - - /** - * A specialized version of `_.some` for arrays without support for iteratee - * shorthands. - * - * @private - * @param {Array} [array] The array to iterate over. - * @param {Function} predicate The function invoked per iteration. - * @returns {boolean} Returns `true` if any element passes the predicate check, - * else `false`. - */ - function arraySome(array, predicate) { - var index = -1, - length = array == null ? 0 : array.length; - - while (++index < length) { - if (predicate(array[index], index, array)) { - return true; - } - } - return false; - } - - /** - * Gets the size of an ASCII `string`. - * - * @private - * @param {string} string The string inspect. - * @returns {number} Returns the string size. - */ - var asciiSize = baseProperty('length'); - - /** - * Converts an ASCII `string` to an array. - * - * @private - * @param {string} string The string to convert. - * @returns {Array} Returns the converted array. - */ - function asciiToArray(string) { - return string.split(''); - } - - /** - * Splits an ASCII `string` into an array of its words. - * - * @private - * @param {string} The string to inspect. - * @returns {Array} Returns the words of `string`. - */ - function asciiWords(string) { - return string.match(reAsciiWord) || []; - } - - /** - * The base implementation of methods like `_.findKey` and `_.findLastKey`, - * without support for iteratee shorthands, which iterates over `collection` - * using `eachFunc`. - * - * @private - * @param {Array|Object} collection The collection to inspect. - * @param {Function} predicate The function invoked per iteration. - * @param {Function} eachFunc The function to iterate over `collection`. - * @returns {*} Returns the found element or its key, else `undefined`. - */ - function baseFindKey(collection, predicate, eachFunc) { - var result; - eachFunc(collection, function(value, key, collection) { - if (predicate(value, key, collection)) { - result = key; - return false; - } - }); - return result; - } - - /** - * The base implementation of `_.findIndex` and `_.findLastIndex` without - * support for iteratee shorthands. - * - * @private - * @param {Array} array The array to inspect. - * @param {Function} predicate The function invoked per iteration. - * @param {number} fromIndex The index to search from. - * @param {boolean} [fromRight] Specify iterating from right to left. - * @returns {number} Returns the index of the matched value, else `-1`. - */ - function baseFindIndex(array, predicate, fromIndex, fromRight) { - var length = array.length, - index = fromIndex + (fromRight ? 1 : -1); - - while ((fromRight ? index-- : ++index < length)) { - if (predicate(array[index], index, array)) { - return index; - } - } - return -1; - } - - /** - * The base implementation of `_.indexOf` without `fromIndex` bounds checks. - * - * @private - * @param {Array} array The array to inspect. - * @param {*} value The value to search for. - * @param {number} fromIndex The index to search from. - * @returns {number} Returns the index of the matched value, else `-1`. - */ - function baseIndexOf(array, value, fromIndex) { - return value === value - ? strictIndexOf(array, value, fromIndex) - : baseFindIndex(array, baseIsNaN, fromIndex); - } - - /** - * This function is like `baseIndexOf` except that it accepts a comparator. - * - * @private - * @param {Array} array The array to inspect. - * @param {*} value The value to search for. - * @param {number} fromIndex The index to search from. - * @param {Function} comparator The comparator invoked per element. - * @returns {number} Returns the index of the matched value, else `-1`. - */ - function baseIndexOfWith(array, value, fromIndex, comparator) { - var index = fromIndex - 1, - length = array.length; - - while (++index < length) { - if (comparator(array[index], value)) { - return index; - } - } - return -1; - } - - /** - * The base implementation of `_.isNaN` without support for number objects. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is `NaN`, else `false`. - */ - function baseIsNaN(value) { - return value !== value; - } - - /** - * The base implementation of `_.mean` and `_.meanBy` without support for - * iteratee shorthands. - * - * @private - * @param {Array} array The array to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {number} Returns the mean. - */ - function baseMean(array, iteratee) { - var length = array == null ? 0 : array.length; - return length ? (baseSum(array, iteratee) / length) : NAN; - } - - /** - * The base implementation of `_.property` without support for deep paths. - * - * @private - * @param {string} key The key of the property to get. - * @returns {Function} Returns the new accessor function. - */ - function baseProperty(key) { - return function(object) { - return object == null ? undefined : object[key]; - }; - } - - /** - * The base implementation of `_.propertyOf` without support for deep paths. - * - * @private - * @param {Object} object The object to query. - * @returns {Function} Returns the new accessor function. - */ - function basePropertyOf(object) { - return function(key) { - return object == null ? undefined : object[key]; - }; - } - - /** - * The base implementation of `_.reduce` and `_.reduceRight`, without support - * for iteratee shorthands, which iterates over `collection` using `eachFunc`. - * - * @private - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @param {*} accumulator The initial value. - * @param {boolean} initAccum Specify using the first or last element of - * `collection` as the initial value. - * @param {Function} eachFunc The function to iterate over `collection`. - * @returns {*} Returns the accumulated value. - */ - function baseReduce(collection, iteratee, accumulator, initAccum, eachFunc) { - eachFunc(collection, function(value, index, collection) { - accumulator = initAccum - ? (initAccum = false, value) - : iteratee(accumulator, value, index, collection); - }); - return accumulator; - } - - /** - * The base implementation of `_.sortBy` which uses `comparer` to define the - * sort order of `array` and replaces criteria objects with their corresponding - * values. - * - * @private - * @param {Array} array The array to sort. - * @param {Function} comparer The function to define sort order. - * @returns {Array} Returns `array`. - */ - function baseSortBy(array, comparer) { - var length = array.length; - - array.sort(comparer); - while (length--) { - array[length] = array[length].value; - } - return array; - } - - /** - * The base implementation of `_.sum` and `_.sumBy` without support for - * iteratee shorthands. - * - * @private - * @param {Array} array The array to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {number} Returns the sum. - */ - function baseSum(array, iteratee) { - var result, - index = -1, - length = array.length; - - while (++index < length) { - var current = iteratee(array[index]); - if (current !== undefined) { - result = result === undefined ? current : (result + current); - } - } - return result; - } - - /** - * The base implementation of `_.times` without support for iteratee shorthands - * or max array length checks. - * - * @private - * @param {number} n The number of times to invoke `iteratee`. - * @param {Function} iteratee The function invoked per iteration. - * @returns {Array} Returns the array of results. - */ - function baseTimes(n, iteratee) { - var index = -1, - result = Array(n); - - while (++index < n) { - result[index] = iteratee(index); - } - return result; - } - - /** - * The base implementation of `_.toPairs` and `_.toPairsIn` which creates an array - * of key-value pairs for `object` corresponding to the property names of `props`. - * - * @private - * @param {Object} object The object to query. - * @param {Array} props The property names to get values for. - * @returns {Object} Returns the key-value pairs. - */ - function baseToPairs(object, props) { - return arrayMap(props, function(key) { - return [key, object[key]]; - }); - } - - /** - * The base implementation of `_.unary` without support for storing metadata. - * - * @private - * @param {Function} func The function to cap arguments for. - * @returns {Function} Returns the new capped function. - */ - function baseUnary(func) { - return function(value) { - return func(value); - }; - } - - /** - * The base implementation of `_.values` and `_.valuesIn` which creates an - * array of `object` property values corresponding to the property names - * of `props`. - * - * @private - * @param {Object} object The object to query. - * @param {Array} props The property names to get values for. - * @returns {Object} Returns the array of property values. - */ - function baseValues(object, props) { - return arrayMap(props, function(key) { - return object[key]; - }); - } - - /** - * Checks if a `cache` value for `key` exists. - * - * @private - * @param {Object} cache The cache to query. - * @param {string} key The key of the entry to check. - * @returns {boolean} Returns `true` if an entry for `key` exists, else `false`. - */ - function cacheHas(cache, key) { - return cache.has(key); - } - - /** - * Used by `_.trim` and `_.trimStart` to get the index of the first string symbol - * that is not found in the character symbols. - * - * @private - * @param {Array} strSymbols The string symbols to inspect. - * @param {Array} chrSymbols The character symbols to find. - * @returns {number} Returns the index of the first unmatched string symbol. - */ - function charsStartIndex(strSymbols, chrSymbols) { - var index = -1, - length = strSymbols.length; - - while (++index < length && baseIndexOf(chrSymbols, strSymbols[index], 0) > -1) {} - return index; - } - - /** - * Used by `_.trim` and `_.trimEnd` to get the index of the last string symbol - * that is not found in the character symbols. - * - * @private - * @param {Array} strSymbols The string symbols to inspect. - * @param {Array} chrSymbols The character symbols to find. - * @returns {number} Returns the index of the last unmatched string symbol. - */ - function charsEndIndex(strSymbols, chrSymbols) { - var index = strSymbols.length; - - while (index-- && baseIndexOf(chrSymbols, strSymbols[index], 0) > -1) {} - return index; - } - - /** - * Gets the number of `placeholder` occurrences in `array`. - * - * @private - * @param {Array} array The array to inspect. - * @param {*} placeholder The placeholder to search for. - * @returns {number} Returns the placeholder count. - */ - function countHolders(array, placeholder) { - var length = array.length, - result = 0; - - while (length--) { - if (array[length] === placeholder) { - ++result; - } - } - return result; - } - - /** - * Used by `_.deburr` to convert Latin-1 Supplement and Latin Extended-A - * letters to basic Latin letters. - * - * @private - * @param {string} letter The matched letter to deburr. - * @returns {string} Returns the deburred letter. - */ - var deburrLetter = basePropertyOf(deburredLetters); - - /** - * Used by `_.escape` to convert characters to HTML entities. - * - * @private - * @param {string} chr The matched character to escape. - * @returns {string} Returns the escaped character. - */ - var escapeHtmlChar = basePropertyOf(htmlEscapes); - - /** - * Used by `_.template` to escape characters for inclusion in compiled string literals. - * - * @private - * @param {string} chr The matched character to escape. - * @returns {string} Returns the escaped character. - */ - function escapeStringChar(chr) { - return '\\' + stringEscapes[chr]; - } - - /** - * Gets the value at `key` of `object`. - * - * @private - * @param {Object} [object] The object to query. - * @param {string} key The key of the property to get. - * @returns {*} Returns the property value. - */ - function getValue(object, key) { - return object == null ? undefined : object[key]; - } - - /** - * Checks if `string` contains Unicode symbols. - * - * @private - * @param {string} string The string to inspect. - * @returns {boolean} Returns `true` if a symbol is found, else `false`. - */ - function hasUnicode(string) { - return reHasUnicode.test(string); - } - - /** - * Checks if `string` contains a word composed of Unicode symbols. - * - * @private - * @param {string} string The string to inspect. - * @returns {boolean} Returns `true` if a word is found, else `false`. - */ - function hasUnicodeWord(string) { - return reHasUnicodeWord.test(string); - } - - /** - * Converts `iterator` to an array. - * - * @private - * @param {Object} iterator The iterator to convert. - * @returns {Array} Returns the converted array. - */ - function iteratorToArray(iterator) { - var data, - result = []; - - while (!(data = iterator.next()).done) { - result.push(data.value); - } - return result; - } - - /** - * Converts `map` to its key-value pairs. - * - * @private - * @param {Object} map The map to convert. - * @returns {Array} Returns the key-value pairs. - */ - function mapToArray(map) { - var index = -1, - result = Array(map.size); - - map.forEach(function(value, key) { - result[++index] = [key, value]; - }); - return result; - } - - /** - * Creates a unary function that invokes `func` with its argument transformed. - * - * @private - * @param {Function} func The function to wrap. - * @param {Function} transform The argument transform. - * @returns {Function} Returns the new function. - */ - function overArg(func, transform) { - return function(arg) { - return func(transform(arg)); - }; - } - - /** - * Replaces all `placeholder` elements in `array` with an internal placeholder - * and returns an array of their indexes. - * - * @private - * @param {Array} array The array to modify. - * @param {*} placeholder The placeholder to replace. - * @returns {Array} Returns the new array of placeholder indexes. - */ - function replaceHolders(array, placeholder) { - var index = -1, - length = array.length, - resIndex = 0, - result = []; - - while (++index < length) { - var value = array[index]; - if (value === placeholder || value === PLACEHOLDER) { - array[index] = PLACEHOLDER; - result[resIndex++] = index; - } - } - return result; - } - - /** - * Converts `set` to an array of its values. - * - * @private - * @param {Object} set The set to convert. - * @returns {Array} Returns the values. - */ - function setToArray(set) { - var index = -1, - result = Array(set.size); - - set.forEach(function(value) { - result[++index] = value; - }); - return result; - } - - /** - * Converts `set` to its value-value pairs. - * - * @private - * @param {Object} set The set to convert. - * @returns {Array} Returns the value-value pairs. - */ - function setToPairs(set) { - var index = -1, - result = Array(set.size); - - set.forEach(function(value) { - result[++index] = [value, value]; - }); - return result; - } - - /** - * A specialized version of `_.indexOf` which performs strict equality - * comparisons of values, i.e. `===`. - * - * @private - * @param {Array} array The array to inspect. - * @param {*} value The value to search for. - * @param {number} fromIndex The index to search from. - * @returns {number} Returns the index of the matched value, else `-1`. - */ - function strictIndexOf(array, value, fromIndex) { - var index = fromIndex - 1, - length = array.length; - - while (++index < length) { - if (array[index] === value) { - return index; - } - } - return -1; - } - - /** - * A specialized version of `_.lastIndexOf` which performs strict equality - * comparisons of values, i.e. `===`. - * - * @private - * @param {Array} array The array to inspect. - * @param {*} value The value to search for. - * @param {number} fromIndex The index to search from. - * @returns {number} Returns the index of the matched value, else `-1`. - */ - function strictLastIndexOf(array, value, fromIndex) { - var index = fromIndex + 1; - while (index--) { - if (array[index] === value) { - return index; - } - } - return index; - } - - /** - * Gets the number of symbols in `string`. - * - * @private - * @param {string} string The string to inspect. - * @returns {number} Returns the string size. - */ - function stringSize(string) { - return hasUnicode(string) - ? unicodeSize(string) - : asciiSize(string); - } - - /** - * Converts `string` to an array. - * - * @private - * @param {string} string The string to convert. - * @returns {Array} Returns the converted array. - */ - function stringToArray(string) { - return hasUnicode(string) - ? unicodeToArray(string) - : asciiToArray(string); - } - - /** - * Used by `_.unescape` to convert HTML entities to characters. - * - * @private - * @param {string} chr The matched character to unescape. - * @returns {string} Returns the unescaped character. - */ - var unescapeHtmlChar = basePropertyOf(htmlUnescapes); - - /** - * Gets the size of a Unicode `string`. - * - * @private - * @param {string} string The string inspect. - * @returns {number} Returns the string size. - */ - function unicodeSize(string) { - var result = reUnicode.lastIndex = 0; - while (reUnicode.test(string)) { - ++result; - } - return result; - } - - /** - * Converts a Unicode `string` to an array. - * - * @private - * @param {string} string The string to convert. - * @returns {Array} Returns the converted array. - */ - function unicodeToArray(string) { - return string.match(reUnicode) || []; - } - - /** - * Splits a Unicode `string` into an array of its words. - * - * @private - * @param {string} The string to inspect. - * @returns {Array} Returns the words of `string`. - */ - function unicodeWords(string) { - return string.match(reUnicodeWord) || []; - } - - /*--------------------------------------------------------------------------*/ - - /** - * Create a new pristine `lodash` function using the `context` object. - * - * @static - * @memberOf _ - * @since 1.1.0 - * @category Util - * @param {Object} [context=root] The context object. - * @returns {Function} Returns a new `lodash` function. - * @example - * - * _.mixin({ 'foo': _.constant('foo') }); - * - * var lodash = _.runInContext(); - * lodash.mixin({ 'bar': lodash.constant('bar') }); - * - * _.isFunction(_.foo); - * // => true - * _.isFunction(_.bar); - * // => false - * - * lodash.isFunction(lodash.foo); - * // => false - * lodash.isFunction(lodash.bar); - * // => true - * - * // Create a suped-up `defer` in Node.js. - * var defer = _.runInContext({ 'setTimeout': setImmediate }).defer; - */ - var runInContext = (function runInContext(context) { - context = context == null ? root : _.defaults(root.Object(), context, _.pick(root, contextProps)); - - /** Built-in constructor references. */ - var Array = context.Array, - Date = context.Date, - Error = context.Error, - Function = context.Function, - Math = context.Math, - Object = context.Object, - RegExp = context.RegExp, - String = context.String, - TypeError = context.TypeError; - - /** Used for built-in method references. */ - var arrayProto = Array.prototype, - funcProto = Function.prototype, - objectProto = Object.prototype; - - /** Used to detect overreaching core-js shims. */ - var coreJsData = context['__core-js_shared__']; - - /** Used to resolve the decompiled source of functions. */ - var funcToString = funcProto.toString; - - /** Used to check objects for own properties. */ - var hasOwnProperty = objectProto.hasOwnProperty; - - /** Used to generate unique IDs. */ - var idCounter = 0; - - /** Used to detect methods masquerading as native. */ - var maskSrcKey = (function() { - var uid = /[^.]+$/.exec(coreJsData && coreJsData.keys && coreJsData.keys.IE_PROTO || ''); - return uid ? ('Symbol(src)_1.' + uid) : ''; - }()); - - /** - * Used to resolve the - * [`toStringTag`](http://ecma-international.org/ecma-262/7.0/#sec-object.prototype.tostring) - * of values. - */ - var nativeObjectToString = objectProto.toString; - - /** Used to infer the `Object` constructor. */ - var objectCtorString = funcToString.call(Object); - - /** Used to restore the original `_` reference in `_.noConflict`. */ - var oldDash = root._; - - /** Used to detect if a method is native. */ - var reIsNative = RegExp('^' + - funcToString.call(hasOwnProperty).replace(reRegExpChar, '\\$&') - .replace(/hasOwnProperty|(function).*?(?=\\\()| for .+?(?=\\\])/g, '$1.*?') + '$' - ); - - /** Built-in value references. */ - var Buffer = moduleExports ? context.Buffer : undefined, - Symbol = context.Symbol, - Uint8Array = context.Uint8Array, - allocUnsafe = Buffer ? Buffer.allocUnsafe : undefined, - getPrototype = overArg(Object.getPrototypeOf, Object), - objectCreate = Object.create, - propertyIsEnumerable = objectProto.propertyIsEnumerable, - splice = arrayProto.splice, - spreadableSymbol = Symbol ? Symbol.isConcatSpreadable : undefined, - symIterator = Symbol ? Symbol.iterator : undefined, - symToStringTag = Symbol ? Symbol.toStringTag : undefined; - - var defineProperty = (function() { - try { - var func = getNative(Object, 'defineProperty'); - func({}, '', {}); - return func; - } catch (e) {} - }()); - - /** Mocked built-ins. */ - var ctxClearTimeout = context.clearTimeout !== root.clearTimeout && context.clearTimeout, - ctxNow = Date && Date.now !== root.Date.now && Date.now, - ctxSetTimeout = context.setTimeout !== root.setTimeout && context.setTimeout; - - /* Built-in method references for those with the same name as other `lodash` methods. */ - var nativeCeil = Math.ceil, - nativeFloor = Math.floor, - nativeGetSymbols = Object.getOwnPropertySymbols, - nativeIsBuffer = Buffer ? Buffer.isBuffer : undefined, - nativeIsFinite = context.isFinite, - nativeJoin = arrayProto.join, - nativeKeys = overArg(Object.keys, Object), - nativeMax = Math.max, - nativeMin = Math.min, - nativeNow = Date.now, - nativeParseInt = context.parseInt, - nativeRandom = Math.random, - nativeReverse = arrayProto.reverse; - - /* Built-in method references that are verified to be native. */ - var DataView = getNative(context, 'DataView'), - Map = getNative(context, 'Map'), - Promise = getNative(context, 'Promise'), - Set = getNative(context, 'Set'), - WeakMap = getNative(context, 'WeakMap'), - nativeCreate = getNative(Object, 'create'); - - /** Used to store function metadata. */ - var metaMap = WeakMap && new WeakMap; - - /** Used to lookup unminified function names. */ - var realNames = {}; - - /** Used to detect maps, sets, and weakmaps. */ - var dataViewCtorString = toSource(DataView), - mapCtorString = toSource(Map), - promiseCtorString = toSource(Promise), - setCtorString = toSource(Set), - weakMapCtorString = toSource(WeakMap); - - /** Used to convert symbols to primitives and strings. */ - var symbolProto = Symbol ? Symbol.prototype : undefined, - symbolValueOf = symbolProto ? symbolProto.valueOf : undefined, - symbolToString = symbolProto ? symbolProto.toString : undefined; - - /*------------------------------------------------------------------------*/ - - /** - * Creates a `lodash` object which wraps `value` to enable implicit method - * chain sequences. Methods that operate on and return arrays, collections, - * and functions can be chained together. Methods that retrieve a single value - * or may return a primitive value will automatically end the chain sequence - * and return the unwrapped value. Otherwise, the value must be unwrapped - * with `_#value`. - * - * Explicit chain sequences, which must be unwrapped with `_#value`, may be - * enabled using `_.chain`. - * - * The execution of chained methods is lazy, that is, it's deferred until - * `_#value` is implicitly or explicitly called. - * - * Lazy evaluation allows several methods to support shortcut fusion. - * Shortcut fusion is an optimization to merge iteratee calls; this avoids - * the creation of intermediate arrays and can greatly reduce the number of - * iteratee executions. Sections of a chain sequence qualify for shortcut - * fusion if the section is applied to an array and iteratees accept only - * one argument. The heuristic for whether a section qualifies for shortcut - * fusion is subject to change. - * - * Chaining is supported in custom builds as long as the `_#value` method is - * directly or indirectly included in the build. - * - * In addition to lodash methods, wrappers have `Array` and `String` methods. - * - * The wrapper `Array` methods are: - * `concat`, `join`, `pop`, `push`, `shift`, `sort`, `splice`, and `unshift` - * - * The wrapper `String` methods are: - * `replace` and `split` - * - * The wrapper methods that support shortcut fusion are: - * `at`, `compact`, `drop`, `dropRight`, `dropWhile`, `filter`, `find`, - * `findLast`, `head`, `initial`, `last`, `map`, `reject`, `reverse`, `slice`, - * `tail`, `take`, `takeRight`, `takeRightWhile`, `takeWhile`, and `toArray` - * - * The chainable wrapper methods are: - * `after`, `ary`, `assign`, `assignIn`, `assignInWith`, `assignWith`, `at`, - * `before`, `bind`, `bindAll`, `bindKey`, `castArray`, `chain`, `chunk`, - * `commit`, `compact`, `concat`, `conforms`, `constant`, `countBy`, `create`, - * `curry`, `debounce`, `defaults`, `defaultsDeep`, `defer`, `delay`, - * `difference`, `differenceBy`, `differenceWith`, `drop`, `dropRight`, - * `dropRightWhile`, `dropWhile`, `extend`, `extendWith`, `fill`, `filter`, - * `flatMap`, `flatMapDeep`, `flatMapDepth`, `flatten`, `flattenDeep`, - * `flattenDepth`, `flip`, `flow`, `flowRight`, `fromPairs`, `functions`, - * `functionsIn`, `groupBy`, `initial`, `intersection`, `intersectionBy`, - * `intersectionWith`, `invert`, `invertBy`, `invokeMap`, `iteratee`, `keyBy`, - * `keys`, `keysIn`, `map`, `mapKeys`, `mapValues`, `matches`, `matchesProperty`, - * `memoize`, `merge`, `mergeWith`, `method`, `methodOf`, `mixin`, `negate`, - * `nthArg`, `omit`, `omitBy`, `once`, `orderBy`, `over`, `overArgs`, - * `overEvery`, `overSome`, `partial`, `partialRight`, `partition`, `pick`, - * `pickBy`, `plant`, `property`, `propertyOf`, `pull`, `pullAll`, `pullAllBy`, - * `pullAllWith`, `pullAt`, `push`, `range`, `rangeRight`, `rearg`, `reject`, - * `remove`, `rest`, `reverse`, `sampleSize`, `set`, `setWith`, `shuffle`, - * `slice`, `sort`, `sortBy`, `splice`, `spread`, `tail`, `take`, `takeRight`, - * `takeRightWhile`, `takeWhile`, `tap`, `throttle`, `thru`, `toArray`, - * `toPairs`, `toPairsIn`, `toPath`, `toPlainObject`, `transform`, `unary`, - * `union`, `unionBy`, `unionWith`, `uniq`, `uniqBy`, `uniqWith`, `unset`, - * `unshift`, `unzip`, `unzipWith`, `update`, `updateWith`, `values`, - * `valuesIn`, `without`, `wrap`, `xor`, `xorBy`, `xorWith`, `zip`, - * `zipObject`, `zipObjectDeep`, and `zipWith` - * - * The wrapper methods that are **not** chainable by default are: - * `add`, `attempt`, `camelCase`, `capitalize`, `ceil`, `clamp`, `clone`, - * `cloneDeep`, `cloneDeepWith`, `cloneWith`, `conformsTo`, `deburr`, - * `defaultTo`, `divide`, `each`, `eachRight`, `endsWith`, `eq`, `escape`, - * `escapeRegExp`, `every`, `find`, `findIndex`, `findKey`, `findLast`, - * `findLastIndex`, `findLastKey`, `first`, `floor`, `forEach`, `forEachRight`, - * `forIn`, `forInRight`, `forOwn`, `forOwnRight`, `get`, `gt`, `gte`, `has`, - * `hasIn`, `head`, `identity`, `includes`, `indexOf`, `inRange`, `invoke`, - * `isArguments`, `isArray`, `isArrayBuffer`, `isArrayLike`, `isArrayLikeObject`, - * `isBoolean`, `isBuffer`, `isDate`, `isElement`, `isEmpty`, `isEqual`, - * `isEqualWith`, `isError`, `isFinite`, `isFunction`, `isInteger`, `isLength`, - * `isMap`, `isMatch`, `isMatchWith`, `isNaN`, `isNative`, `isNil`, `isNull`, - * `isNumber`, `isObject`, `isObjectLike`, `isPlainObject`, `isRegExp`, - * `isSafeInteger`, `isSet`, `isString`, `isUndefined`, `isTypedArray`, - * `isWeakMap`, `isWeakSet`, `join`, `kebabCase`, `last`, `lastIndexOf`, - * `lowerCase`, `lowerFirst`, `lt`, `lte`, `max`, `maxBy`, `mean`, `meanBy`, - * `min`, `minBy`, `multiply`, `noConflict`, `noop`, `now`, `nth`, `pad`, - * `padEnd`, `padStart`, `parseInt`, `pop`, `random`, `reduce`, `reduceRight`, - * `repeat`, `result`, `round`, `runInContext`, `sample`, `shift`, `size`, - * `snakeCase`, `some`, `sortedIndex`, `sortedIndexBy`, `sortedLastIndex`, - * `sortedLastIndexBy`, `startCase`, `startsWith`, `stubArray`, `stubFalse`, - * `stubObject`, `stubString`, `stubTrue`, `subtract`, `sum`, `sumBy`, - * `template`, `times`, `toFinite`, `toInteger`, `toJSON`, `toLength`, - * `toLower`, `toNumber`, `toSafeInteger`, `toString`, `toUpper`, `trim`, - * `trimEnd`, `trimStart`, `truncate`, `unescape`, `uniqueId`, `upperCase`, - * `upperFirst`, `value`, and `words` - * - * @name _ - * @constructor - * @category Seq - * @param {*} value The value to wrap in a `lodash` instance. - * @returns {Object} Returns the new `lodash` wrapper instance. - * @example - * - * function square(n) { - * return n * n; - * } - * - * var wrapped = _([1, 2, 3]); - * - * // Returns an unwrapped value. - * wrapped.reduce(_.add); - * // => 6 - * - * // Returns a wrapped value. - * var squares = wrapped.map(square); - * - * _.isArray(squares); - * // => false - * - * _.isArray(squares.value()); - * // => true - */ - function lodash(value) { - if (isObjectLike(value) && !isArray(value) && !(value instanceof LazyWrapper)) { - if (value instanceof LodashWrapper) { - return value; - } - if (hasOwnProperty.call(value, '__wrapped__')) { - return wrapperClone(value); - } - } - return new LodashWrapper(value); - } - - /** - * The base implementation of `_.create` without support for assigning - * properties to the created object. - * - * @private - * @param {Object} proto The object to inherit from. - * @returns {Object} Returns the new object. - */ - var baseCreate = (function() { - function object() {} - return function(proto) { - if (!isObject(proto)) { - return {}; - } - if (objectCreate) { - return objectCreate(proto); - } - object.prototype = proto; - var result = new object; - object.prototype = undefined; - return result; - }; - }()); - - /** - * The function whose prototype chain sequence wrappers inherit from. - * - * @private - */ - function baseLodash() { - // No operation performed. - } - - /** - * The base constructor for creating `lodash` wrapper objects. - * - * @private - * @param {*} value The value to wrap. - * @param {boolean} [chainAll] Enable explicit method chain sequences. - */ - function LodashWrapper(value, chainAll) { - this.__wrapped__ = value; - this.__actions__ = []; - this.__chain__ = !!chainAll; - this.__index__ = 0; - this.__values__ = undefined; - } - - /** - * By default, the template delimiters used by lodash are like those in - * embedded Ruby (ERB) as well as ES2015 template strings. Change the - * following template settings to use alternative delimiters. - * - * @static - * @memberOf _ - * @type {Object} - */ - lodash.templateSettings = { - - /** - * Used to detect `data` property values to be HTML-escaped. - * - * @memberOf _.templateSettings - * @type {RegExp} - */ - 'escape': reEscape, - - /** - * Used to detect code to be evaluated. - * - * @memberOf _.templateSettings - * @type {RegExp} - */ - 'evaluate': reEvaluate, - - /** - * Used to detect `data` property values to inject. - * - * @memberOf _.templateSettings - * @type {RegExp} - */ - 'interpolate': reInterpolate, - - /** - * Used to reference the data object in the template text. - * - * @memberOf _.templateSettings - * @type {string} - */ - 'variable': '', - - /** - * Used to import variables into the compiled template. - * - * @memberOf _.templateSettings - * @type {Object} - */ - 'imports': { - - /** - * A reference to the `lodash` function. - * - * @memberOf _.templateSettings.imports - * @type {Function} - */ - '_': lodash - } - }; - - // Ensure wrappers are instances of `baseLodash`. - lodash.prototype = baseLodash.prototype; - lodash.prototype.constructor = lodash; - - LodashWrapper.prototype = baseCreate(baseLodash.prototype); - LodashWrapper.prototype.constructor = LodashWrapper; - - /*------------------------------------------------------------------------*/ - - /** - * Creates a lazy wrapper object which wraps `value` to enable lazy evaluation. - * - * @private - * @constructor - * @param {*} value The value to wrap. - */ - function LazyWrapper(value) { - this.__wrapped__ = value; - this.__actions__ = []; - this.__dir__ = 1; - this.__filtered__ = false; - this.__iteratees__ = []; - this.__takeCount__ = MAX_ARRAY_LENGTH; - this.__views__ = []; - } - - /** - * Creates a clone of the lazy wrapper object. - * - * @private - * @name clone - * @memberOf LazyWrapper - * @returns {Object} Returns the cloned `LazyWrapper` object. - */ - function lazyClone() { - var result = new LazyWrapper(this.__wrapped__); - result.__actions__ = copyArray(this.__actions__); - result.__dir__ = this.__dir__; - result.__filtered__ = this.__filtered__; - result.__iteratees__ = copyArray(this.__iteratees__); - result.__takeCount__ = this.__takeCount__; - result.__views__ = copyArray(this.__views__); - return result; - } - - /** - * Reverses the direction of lazy iteration. - * - * @private - * @name reverse - * @memberOf LazyWrapper - * @returns {Object} Returns the new reversed `LazyWrapper` object. - */ - function lazyReverse() { - if (this.__filtered__) { - var result = new LazyWrapper(this); - result.__dir__ = -1; - result.__filtered__ = true; - } else { - result = this.clone(); - result.__dir__ *= -1; - } - return result; - } - - /** - * Extracts the unwrapped value from its lazy wrapper. - * - * @private - * @name value - * @memberOf LazyWrapper - * @returns {*} Returns the unwrapped value. - */ - function lazyValue() { - var array = this.__wrapped__.value(), - dir = this.__dir__, - isArr = isArray(array), - isRight = dir < 0, - arrLength = isArr ? array.length : 0, - view = getView(0, arrLength, this.__views__), - start = view.start, - end = view.end, - length = end - start, - index = isRight ? end : (start - 1), - iteratees = this.__iteratees__, - iterLength = iteratees.length, - resIndex = 0, - takeCount = nativeMin(length, this.__takeCount__); - - if (!isArr || (!isRight && arrLength == length && takeCount == length)) { - return baseWrapperValue(array, this.__actions__); - } - var result = []; - - outer: - while (length-- && resIndex < takeCount) { - index += dir; - - var iterIndex = -1, - value = array[index]; - - while (++iterIndex < iterLength) { - var data = iteratees[iterIndex], - iteratee = data.iteratee, - type = data.type, - computed = iteratee(value); - - if (type == LAZY_MAP_FLAG) { - value = computed; - } else if (!computed) { - if (type == LAZY_FILTER_FLAG) { - continue outer; - } else { - break outer; - } - } - } - result[resIndex++] = value; - } - return result; - } - - // Ensure `LazyWrapper` is an instance of `baseLodash`. - LazyWrapper.prototype = baseCreate(baseLodash.prototype); - LazyWrapper.prototype.constructor = LazyWrapper; - - /*------------------------------------------------------------------------*/ - - /** - * Creates a hash object. - * - * @private - * @constructor - * @param {Array} [entries] The key-value pairs to cache. - */ - function Hash(entries) { - var index = -1, - length = entries == null ? 0 : entries.length; - - this.clear(); - while (++index < length) { - var entry = entries[index]; - this.set(entry[0], entry[1]); - } - } - - /** - * Removes all key-value entries from the hash. - * - * @private - * @name clear - * @memberOf Hash - */ - function hashClear() { - this.__data__ = nativeCreate ? nativeCreate(null) : {}; - this.size = 0; - } - - /** - * Removes `key` and its value from the hash. - * - * @private - * @name delete - * @memberOf Hash - * @param {Object} hash The hash to modify. - * @param {string} key The key of the value to remove. - * @returns {boolean} Returns `true` if the entry was removed, else `false`. - */ - function hashDelete(key) { - var result = this.has(key) && delete this.__data__[key]; - this.size -= result ? 1 : 0; - return result; - } - - /** - * Gets the hash value for `key`. - * - * @private - * @name get - * @memberOf Hash - * @param {string} key The key of the value to get. - * @returns {*} Returns the entry value. - */ - function hashGet(key) { - var data = this.__data__; - if (nativeCreate) { - var result = data[key]; - return result === HASH_UNDEFINED ? undefined : result; - } - return hasOwnProperty.call(data, key) ? data[key] : undefined; - } - - /** - * Checks if a hash value for `key` exists. - * - * @private - * @name has - * @memberOf Hash - * @param {string} key The key of the entry to check. - * @returns {boolean} Returns `true` if an entry for `key` exists, else `false`. - */ - function hashHas(key) { - var data = this.__data__; - return nativeCreate ? (data[key] !== undefined) : hasOwnProperty.call(data, key); - } - - /** - * Sets the hash `key` to `value`. - * - * @private - * @name set - * @memberOf Hash - * @param {string} key The key of the value to set. - * @param {*} value The value to set. - * @returns {Object} Returns the hash instance. - */ - function hashSet(key, value) { - var data = this.__data__; - this.size += this.has(key) ? 0 : 1; - data[key] = (nativeCreate && value === undefined) ? HASH_UNDEFINED : value; - return this; - } - - // Add methods to `Hash`. - Hash.prototype.clear = hashClear; - Hash.prototype['delete'] = hashDelete; - Hash.prototype.get = hashGet; - Hash.prototype.has = hashHas; - Hash.prototype.set = hashSet; - - /*------------------------------------------------------------------------*/ - - /** - * Creates an list cache object. - * - * @private - * @constructor - * @param {Array} [entries] The key-value pairs to cache. - */ - function ListCache(entries) { - var index = -1, - length = entries == null ? 0 : entries.length; - - this.clear(); - while (++index < length) { - var entry = entries[index]; - this.set(entry[0], entry[1]); - } - } - - /** - * Removes all key-value entries from the list cache. - * - * @private - * @name clear - * @memberOf ListCache - */ - function listCacheClear() { - this.__data__ = []; - this.size = 0; - } - - /** - * Removes `key` and its value from the list cache. - * - * @private - * @name delete - * @memberOf ListCache - * @param {string} key The key of the value to remove. - * @returns {boolean} Returns `true` if the entry was removed, else `false`. - */ - function listCacheDelete(key) { - var data = this.__data__, - index = assocIndexOf(data, key); - - if (index < 0) { - return false; - } - var lastIndex = data.length - 1; - if (index == lastIndex) { - data.pop(); - } else { - splice.call(data, index, 1); - } - --this.size; - return true; - } - - /** - * Gets the list cache value for `key`. - * - * @private - * @name get - * @memberOf ListCache - * @param {string} key The key of the value to get. - * @returns {*} Returns the entry value. - */ - function listCacheGet(key) { - var data = this.__data__, - index = assocIndexOf(data, key); - - return index < 0 ? undefined : data[index][1]; - } - - /** - * Checks if a list cache value for `key` exists. - * - * @private - * @name has - * @memberOf ListCache - * @param {string} key The key of the entry to check. - * @returns {boolean} Returns `true` if an entry for `key` exists, else `false`. - */ - function listCacheHas(key) { - return assocIndexOf(this.__data__, key) > -1; - } - - /** - * Sets the list cache `key` to `value`. - * - * @private - * @name set - * @memberOf ListCache - * @param {string} key The key of the value to set. - * @param {*} value The value to set. - * @returns {Object} Returns the list cache instance. - */ - function listCacheSet(key, value) { - var data = this.__data__, - index = assocIndexOf(data, key); - - if (index < 0) { - ++this.size; - data.push([key, value]); - } else { - data[index][1] = value; - } - return this; - } - - // Add methods to `ListCache`. - ListCache.prototype.clear = listCacheClear; - ListCache.prototype['delete'] = listCacheDelete; - ListCache.prototype.get = listCacheGet; - ListCache.prototype.has = listCacheHas; - ListCache.prototype.set = listCacheSet; - - /*------------------------------------------------------------------------*/ - - /** - * Creates a map cache object to store key-value pairs. - * - * @private - * @constructor - * @param {Array} [entries] The key-value pairs to cache. - */ - function MapCache(entries) { - var index = -1, - length = entries == null ? 0 : entries.length; - - this.clear(); - while (++index < length) { - var entry = entries[index]; - this.set(entry[0], entry[1]); - } - } - - /** - * Removes all key-value entries from the map. - * - * @private - * @name clear - * @memberOf MapCache - */ - function mapCacheClear() { - this.size = 0; - this.__data__ = { - 'hash': new Hash, - 'map': new (Map || ListCache), - 'string': new Hash - }; - } - - /** - * Removes `key` and its value from the map. - * - * @private - * @name delete - * @memberOf MapCache - * @param {string} key The key of the value to remove. - * @returns {boolean} Returns `true` if the entry was removed, else `false`. - */ - function mapCacheDelete(key) { - var result = getMapData(this, key)['delete'](key); - this.size -= result ? 1 : 0; - return result; - } - - /** - * Gets the map value for `key`. - * - * @private - * @name get - * @memberOf MapCache - * @param {string} key The key of the value to get. - * @returns {*} Returns the entry value. - */ - function mapCacheGet(key) { - return getMapData(this, key).get(key); - } - - /** - * Checks if a map value for `key` exists. - * - * @private - * @name has - * @memberOf MapCache - * @param {string} key The key of the entry to check. - * @returns {boolean} Returns `true` if an entry for `key` exists, else `false`. - */ - function mapCacheHas(key) { - return getMapData(this, key).has(key); - } - - /** - * Sets the map `key` to `value`. - * - * @private - * @name set - * @memberOf MapCache - * @param {string} key The key of the value to set. - * @param {*} value The value to set. - * @returns {Object} Returns the map cache instance. - */ - function mapCacheSet(key, value) { - var data = getMapData(this, key), - size = data.size; - - data.set(key, value); - this.size += data.size == size ? 0 : 1; - return this; - } - - // Add methods to `MapCache`. - MapCache.prototype.clear = mapCacheClear; - MapCache.prototype['delete'] = mapCacheDelete; - MapCache.prototype.get = mapCacheGet; - MapCache.prototype.has = mapCacheHas; - MapCache.prototype.set = mapCacheSet; - - /*------------------------------------------------------------------------*/ - - /** - * - * Creates an array cache object to store unique values. - * - * @private - * @constructor - * @param {Array} [values] The values to cache. - */ - function SetCache(values) { - var index = -1, - length = values == null ? 0 : values.length; - - this.__data__ = new MapCache; - while (++index < length) { - this.add(values[index]); - } - } - - /** - * Adds `value` to the array cache. - * - * @private - * @name add - * @memberOf SetCache - * @alias push - * @param {*} value The value to cache. - * @returns {Object} Returns the cache instance. - */ - function setCacheAdd(value) { - this.__data__.set(value, HASH_UNDEFINED); - return this; - } - - /** - * Checks if `value` is in the array cache. - * - * @private - * @name has - * @memberOf SetCache - * @param {*} value The value to search for. - * @returns {number} Returns `true` if `value` is found, else `false`. - */ - function setCacheHas(value) { - return this.__data__.has(value); - } - - // Add methods to `SetCache`. - SetCache.prototype.add = SetCache.prototype.push = setCacheAdd; - SetCache.prototype.has = setCacheHas; - - /*------------------------------------------------------------------------*/ - - /** - * Creates a stack cache object to store key-value pairs. - * - * @private - * @constructor - * @param {Array} [entries] The key-value pairs to cache. - */ - function Stack(entries) { - var data = this.__data__ = new ListCache(entries); - this.size = data.size; - } - - /** - * Removes all key-value entries from the stack. - * - * @private - * @name clear - * @memberOf Stack - */ - function stackClear() { - this.__data__ = new ListCache; - this.size = 0; - } - - /** - * Removes `key` and its value from the stack. - * - * @private - * @name delete - * @memberOf Stack - * @param {string} key The key of the value to remove. - * @returns {boolean} Returns `true` if the entry was removed, else `false`. - */ - function stackDelete(key) { - var data = this.__data__, - result = data['delete'](key); - - this.size = data.size; - return result; - } - - /** - * Gets the stack value for `key`. - * - * @private - * @name get - * @memberOf Stack - * @param {string} key The key of the value to get. - * @returns {*} Returns the entry value. - */ - function stackGet(key) { - return this.__data__.get(key); - } - - /** - * Checks if a stack value for `key` exists. - * - * @private - * @name has - * @memberOf Stack - * @param {string} key The key of the entry to check. - * @returns {boolean} Returns `true` if an entry for `key` exists, else `false`. - */ - function stackHas(key) { - return this.__data__.has(key); - } - - /** - * Sets the stack `key` to `value`. - * - * @private - * @name set - * @memberOf Stack - * @param {string} key The key of the value to set. - * @param {*} value The value to set. - * @returns {Object} Returns the stack cache instance. - */ - function stackSet(key, value) { - var data = this.__data__; - if (data instanceof ListCache) { - var pairs = data.__data__; - if (!Map || (pairs.length < LARGE_ARRAY_SIZE - 1)) { - pairs.push([key, value]); - this.size = ++data.size; - return this; - } - data = this.__data__ = new MapCache(pairs); - } - data.set(key, value); - this.size = data.size; - return this; - } - - // Add methods to `Stack`. - Stack.prototype.clear = stackClear; - Stack.prototype['delete'] = stackDelete; - Stack.prototype.get = stackGet; - Stack.prototype.has = stackHas; - Stack.prototype.set = stackSet; - - /*------------------------------------------------------------------------*/ - - /** - * Creates an array of the enumerable property names of the array-like `value`. - * - * @private - * @param {*} value The value to query. - * @param {boolean} inherited Specify returning inherited property names. - * @returns {Array} Returns the array of property names. - */ - function arrayLikeKeys(value, inherited) { - var isArr = isArray(value), - isArg = !isArr && isArguments(value), - isBuff = !isArr && !isArg && isBuffer(value), - isType = !isArr && !isArg && !isBuff && isTypedArray(value), - skipIndexes = isArr || isArg || isBuff || isType, - result = skipIndexes ? baseTimes(value.length, String) : [], - length = result.length; - - for (var key in value) { - if ((inherited || hasOwnProperty.call(value, key)) && - !(skipIndexes && ( - // Safari 9 has enumerable `arguments.length` in strict mode. - key == 'length' || - // Node.js 0.10 has enumerable non-index properties on buffers. - (isBuff && (key == 'offset' || key == 'parent')) || - // PhantomJS 2 has enumerable non-index properties on typed arrays. - (isType && (key == 'buffer' || key == 'byteLength' || key == 'byteOffset')) || - // Skip index properties. - isIndex(key, length) - ))) { - result.push(key); - } - } - return result; - } - - /** - * A specialized version of `_.sample` for arrays. - * - * @private - * @param {Array} array The array to sample. - * @returns {*} Returns the random element. - */ - function arraySample(array) { - var length = array.length; - return length ? array[baseRandom(0, length - 1)] : undefined; - } - - /** - * A specialized version of `_.sampleSize` for arrays. - * - * @private - * @param {Array} array The array to sample. - * @param {number} n The number of elements to sample. - * @returns {Array} Returns the random elements. - */ - function arraySampleSize(array, n) { - return shuffleSelf(copyArray(array), baseClamp(n, 0, array.length)); - } - - /** - * A specialized version of `_.shuffle` for arrays. - * - * @private - * @param {Array} array The array to shuffle. - * @returns {Array} Returns the new shuffled array. - */ - function arrayShuffle(array) { - return shuffleSelf(copyArray(array)); - } - - /** - * This function is like `assignValue` except that it doesn't assign - * `undefined` values. - * - * @private - * @param {Object} object The object to modify. - * @param {string} key The key of the property to assign. - * @param {*} value The value to assign. - */ - function assignMergeValue(object, key, value) { - if ((value !== undefined && !eq(object[key], value)) || - (value === undefined && !(key in object))) { - baseAssignValue(object, key, value); - } - } - - /** - * Assigns `value` to `key` of `object` if the existing value is not equivalent - * using [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * for equality comparisons. - * - * @private - * @param {Object} object The object to modify. - * @param {string} key The key of the property to assign. - * @param {*} value The value to assign. - */ - function assignValue(object, key, value) { - var objValue = object[key]; - if (!(hasOwnProperty.call(object, key) && eq(objValue, value)) || - (value === undefined && !(key in object))) { - baseAssignValue(object, key, value); - } - } - - /** - * Gets the index at which the `key` is found in `array` of key-value pairs. - * - * @private - * @param {Array} array The array to inspect. - * @param {*} key The key to search for. - * @returns {number} Returns the index of the matched value, else `-1`. - */ - function assocIndexOf(array, key) { - var length = array.length; - while (length--) { - if (eq(array[length][0], key)) { - return length; - } - } - return -1; - } - - /** - * Aggregates elements of `collection` on `accumulator` with keys transformed - * by `iteratee` and values set by `setter`. - * - * @private - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} setter The function to set `accumulator` values. - * @param {Function} iteratee The iteratee to transform keys. - * @param {Object} accumulator The initial aggregated object. - * @returns {Function} Returns `accumulator`. - */ - function baseAggregator(collection, setter, iteratee, accumulator) { - baseEach(collection, function(value, key, collection) { - setter(accumulator, value, iteratee(value), collection); - }); - return accumulator; - } - - /** - * The base implementation of `_.assign` without support for multiple sources - * or `customizer` functions. - * - * @private - * @param {Object} object The destination object. - * @param {Object} source The source object. - * @returns {Object} Returns `object`. - */ - function baseAssign(object, source) { - return object && copyObject(source, keys(source), object); - } - - /** - * The base implementation of `_.assignIn` without support for multiple sources - * or `customizer` functions. - * - * @private - * @param {Object} object The destination object. - * @param {Object} source The source object. - * @returns {Object} Returns `object`. - */ - function baseAssignIn(object, source) { - return object && copyObject(source, keysIn(source), object); - } - - /** - * The base implementation of `assignValue` and `assignMergeValue` without - * value checks. - * - * @private - * @param {Object} object The object to modify. - * @param {string} key The key of the property to assign. - * @param {*} value The value to assign. - */ - function baseAssignValue(object, key, value) { - if (key == '__proto__' && defineProperty) { - defineProperty(object, key, { - 'configurable': true, - 'enumerable': true, - 'value': value, - 'writable': true - }); - } else { - object[key] = value; - } - } - - /** - * The base implementation of `_.at` without support for individual paths. - * - * @private - * @param {Object} object The object to iterate over. - * @param {string[]} paths The property paths to pick. - * @returns {Array} Returns the picked elements. - */ - function baseAt(object, paths) { - var index = -1, - length = paths.length, - result = Array(length), - skip = object == null; - - while (++index < length) { - result[index] = skip ? undefined : get(object, paths[index]); - } - return result; - } - - /** - * The base implementation of `_.clamp` which doesn't coerce arguments. - * - * @private - * @param {number} number The number to clamp. - * @param {number} [lower] The lower bound. - * @param {number} upper The upper bound. - * @returns {number} Returns the clamped number. - */ - function baseClamp(number, lower, upper) { - if (number === number) { - if (upper !== undefined) { - number = number <= upper ? number : upper; - } - if (lower !== undefined) { - number = number >= lower ? number : lower; - } - } - return number; - } - - /** - * The base implementation of `_.clone` and `_.cloneDeep` which tracks - * traversed objects. - * - * @private - * @param {*} value The value to clone. - * @param {boolean} bitmask The bitmask flags. - * 1 - Deep clone - * 2 - Flatten inherited properties - * 4 - Clone symbols - * @param {Function} [customizer] The function to customize cloning. - * @param {string} [key] The key of `value`. - * @param {Object} [object] The parent object of `value`. - * @param {Object} [stack] Tracks traversed objects and their clone counterparts. - * @returns {*} Returns the cloned value. - */ - function baseClone(value, bitmask, customizer, key, object, stack) { - var result, - isDeep = bitmask & CLONE_DEEP_FLAG, - isFlat = bitmask & CLONE_FLAT_FLAG, - isFull = bitmask & CLONE_SYMBOLS_FLAG; - - if (customizer) { - result = object ? customizer(value, key, object, stack) : customizer(value); - } - if (result !== undefined) { - return result; - } - if (!isObject(value)) { - return value; - } - var isArr = isArray(value); - if (isArr) { - result = initCloneArray(value); - if (!isDeep) { - return copyArray(value, result); - } - } else { - var tag = getTag(value), - isFunc = tag == funcTag || tag == genTag; - - if (isBuffer(value)) { - return cloneBuffer(value, isDeep); - } - if (tag == objectTag || tag == argsTag || (isFunc && !object)) { - result = (isFlat || isFunc) ? {} : initCloneObject(value); - if (!isDeep) { - return isFlat - ? copySymbolsIn(value, baseAssignIn(result, value)) - : copySymbols(value, baseAssign(result, value)); - } - } else { - if (!cloneableTags[tag]) { - return object ? value : {}; - } - result = initCloneByTag(value, tag, isDeep); - } - } - // Check for circular references and return its corresponding clone. - stack || (stack = new Stack); - var stacked = stack.get(value); - if (stacked) { - return stacked; - } - stack.set(value, result); - - if (isSet(value)) { - value.forEach(function(subValue) { - result.add(baseClone(subValue, bitmask, customizer, subValue, value, stack)); - }); - } else if (isMap(value)) { - value.forEach(function(subValue, key) { - result.set(key, baseClone(subValue, bitmask, customizer, key, value, stack)); - }); - } - - var keysFunc = isFull - ? (isFlat ? getAllKeysIn : getAllKeys) - : (isFlat ? keysIn : keys); - - var props = isArr ? undefined : keysFunc(value); - arrayEach(props || value, function(subValue, key) { - if (props) { - key = subValue; - subValue = value[key]; - } - // Recursively populate clone (susceptible to call stack limits). - assignValue(result, key, baseClone(subValue, bitmask, customizer, key, value, stack)); - }); - return result; - } - - /** - * The base implementation of `_.conforms` which doesn't clone `source`. - * - * @private - * @param {Object} source The object of property predicates to conform to. - * @returns {Function} Returns the new spec function. - */ - function baseConforms(source) { - var props = keys(source); - return function(object) { - return baseConformsTo(object, source, props); - }; - } - - /** - * The base implementation of `_.conformsTo` which accepts `props` to check. - * - * @private - * @param {Object} object The object to inspect. - * @param {Object} source The object of property predicates to conform to. - * @returns {boolean} Returns `true` if `object` conforms, else `false`. - */ - function baseConformsTo(object, source, props) { - var length = props.length; - if (object == null) { - return !length; - } - object = Object(object); - while (length--) { - var key = props[length], - predicate = source[key], - value = object[key]; - - if ((value === undefined && !(key in object)) || !predicate(value)) { - return false; - } - } - return true; - } - - /** - * The base implementation of `_.delay` and `_.defer` which accepts `args` - * to provide to `func`. - * - * @private - * @param {Function} func The function to delay. - * @param {number} wait The number of milliseconds to delay invocation. - * @param {Array} args The arguments to provide to `func`. - * @returns {number|Object} Returns the timer id or timeout object. - */ - function baseDelay(func, wait, args) { - if (typeof func != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - return setTimeout(function() { func.apply(undefined, args); }, wait); - } - - /** - * The base implementation of methods like `_.difference` without support - * for excluding multiple arrays or iteratee shorthands. - * - * @private - * @param {Array} array The array to inspect. - * @param {Array} values The values to exclude. - * @param {Function} [iteratee] The iteratee invoked per element. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns the new array of filtered values. - */ - function baseDifference(array, values, iteratee, comparator) { - var index = -1, - includes = arrayIncludes, - isCommon = true, - length = array.length, - result = [], - valuesLength = values.length; - - if (!length) { - return result; - } - if (iteratee) { - values = arrayMap(values, baseUnary(iteratee)); - } - if (comparator) { - includes = arrayIncludesWith; - isCommon = false; - } - else if (values.length >= LARGE_ARRAY_SIZE) { - includes = cacheHas; - isCommon = false; - values = new SetCache(values); - } - outer: - while (++index < length) { - var value = array[index], - computed = iteratee == null ? value : iteratee(value); - - value = (comparator || value !== 0) ? value : 0; - if (isCommon && computed === computed) { - var valuesIndex = valuesLength; - while (valuesIndex--) { - if (values[valuesIndex] === computed) { - continue outer; - } - } - result.push(value); - } - else if (!includes(values, computed, comparator)) { - result.push(value); - } - } - return result; - } - - /** - * The base implementation of `_.forEach` without support for iteratee shorthands. - * - * @private - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {Array|Object} Returns `collection`. - */ - var baseEach = createBaseEach(baseForOwn); - - /** - * The base implementation of `_.forEachRight` without support for iteratee shorthands. - * - * @private - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {Array|Object} Returns `collection`. - */ - var baseEachRight = createBaseEach(baseForOwnRight, true); - - /** - * The base implementation of `_.every` without support for iteratee shorthands. - * - * @private - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} predicate The function invoked per iteration. - * @returns {boolean} Returns `true` if all elements pass the predicate check, - * else `false` - */ - function baseEvery(collection, predicate) { - var result = true; - baseEach(collection, function(value, index, collection) { - result = !!predicate(value, index, collection); - return result; - }); - return result; - } - - /** - * The base implementation of methods like `_.max` and `_.min` which accepts a - * `comparator` to determine the extremum value. - * - * @private - * @param {Array} array The array to iterate over. - * @param {Function} iteratee The iteratee invoked per iteration. - * @param {Function} comparator The comparator used to compare values. - * @returns {*} Returns the extremum value. - */ - function baseExtremum(array, iteratee, comparator) { - var index = -1, - length = array.length; - - while (++index < length) { - var value = array[index], - current = iteratee(value); - - if (current != null && (computed === undefined - ? (current === current && !isSymbol(current)) - : comparator(current, computed) - )) { - var computed = current, - result = value; - } - } - return result; - } - - /** - * The base implementation of `_.fill` without an iteratee call guard. - * - * @private - * @param {Array} array The array to fill. - * @param {*} value The value to fill `array` with. - * @param {number} [start=0] The start position. - * @param {number} [end=array.length] The end position. - * @returns {Array} Returns `array`. - */ - function baseFill(array, value, start, end) { - var length = array.length; - - start = toInteger(start); - if (start < 0) { - start = -start > length ? 0 : (length + start); - } - end = (end === undefined || end > length) ? length : toInteger(end); - if (end < 0) { - end += length; - } - end = start > end ? 0 : toLength(end); - while (start < end) { - array[start++] = value; - } - return array; - } - - /** - * The base implementation of `_.filter` without support for iteratee shorthands. - * - * @private - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} predicate The function invoked per iteration. - * @returns {Array} Returns the new filtered array. - */ - function baseFilter(collection, predicate) { - var result = []; - baseEach(collection, function(value, index, collection) { - if (predicate(value, index, collection)) { - result.push(value); - } - }); - return result; - } - - /** - * The base implementation of `_.flatten` with support for restricting flattening. - * - * @private - * @param {Array} array The array to flatten. - * @param {number} depth The maximum recursion depth. - * @param {boolean} [predicate=isFlattenable] The function invoked per iteration. - * @param {boolean} [isStrict] Restrict to values that pass `predicate` checks. - * @param {Array} [result=[]] The initial result value. - * @returns {Array} Returns the new flattened array. - */ - function baseFlatten(array, depth, predicate, isStrict, result) { - var index = -1, - length = array.length; - - predicate || (predicate = isFlattenable); - result || (result = []); - - while (++index < length) { - var value = array[index]; - if (depth > 0 && predicate(value)) { - if (depth > 1) { - // Recursively flatten arrays (susceptible to call stack limits). - baseFlatten(value, depth - 1, predicate, isStrict, result); - } else { - arrayPush(result, value); - } - } else if (!isStrict) { - result[result.length] = value; - } - } - return result; - } - - /** - * The base implementation of `baseForOwn` which iterates over `object` - * properties returned by `keysFunc` and invokes `iteratee` for each property. - * Iteratee functions may exit iteration early by explicitly returning `false`. - * - * @private - * @param {Object} object The object to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @param {Function} keysFunc The function to get the keys of `object`. - * @returns {Object} Returns `object`. - */ - var baseFor = createBaseFor(); - - /** - * This function is like `baseFor` except that it iterates over properties - * in the opposite order. - * - * @private - * @param {Object} object The object to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @param {Function} keysFunc The function to get the keys of `object`. - * @returns {Object} Returns `object`. - */ - var baseForRight = createBaseFor(true); - - /** - * The base implementation of `_.forOwn` without support for iteratee shorthands. - * - * @private - * @param {Object} object The object to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {Object} Returns `object`. - */ - function baseForOwn(object, iteratee) { - return object && baseFor(object, iteratee, keys); - } - - /** - * The base implementation of `_.forOwnRight` without support for iteratee shorthands. - * - * @private - * @param {Object} object The object to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {Object} Returns `object`. - */ - function baseForOwnRight(object, iteratee) { - return object && baseForRight(object, iteratee, keys); - } - - /** - * The base implementation of `_.functions` which creates an array of - * `object` function property names filtered from `props`. - * - * @private - * @param {Object} object The object to inspect. - * @param {Array} props The property names to filter. - * @returns {Array} Returns the function names. - */ - function baseFunctions(object, props) { - return arrayFilter(props, function(key) { - return isFunction(object[key]); - }); - } - - /** - * The base implementation of `_.get` without support for default values. - * - * @private - * @param {Object} object The object to query. - * @param {Array|string} path The path of the property to get. - * @returns {*} Returns the resolved value. - */ - function baseGet(object, path) { - path = castPath(path, object); - - var index = 0, - length = path.length; - - while (object != null && index < length) { - object = object[toKey(path[index++])]; - } - return (index && index == length) ? object : undefined; - } - - /** - * The base implementation of `getAllKeys` and `getAllKeysIn` which uses - * `keysFunc` and `symbolsFunc` to get the enumerable property names and - * symbols of `object`. - * - * @private - * @param {Object} object The object to query. - * @param {Function} keysFunc The function to get the keys of `object`. - * @param {Function} symbolsFunc The function to get the symbols of `object`. - * @returns {Array} Returns the array of property names and symbols. - */ - function baseGetAllKeys(object, keysFunc, symbolsFunc) { - var result = keysFunc(object); - return isArray(object) ? result : arrayPush(result, symbolsFunc(object)); - } - - /** - * The base implementation of `getTag` without fallbacks for buggy environments. - * - * @private - * @param {*} value The value to query. - * @returns {string} Returns the `toStringTag`. - */ - function baseGetTag(value) { - if (value == null) { - return value === undefined ? undefinedTag : nullTag; - } - return (symToStringTag && symToStringTag in Object(value)) - ? getRawTag(value) - : objectToString(value); - } - - /** - * The base implementation of `_.gt` which doesn't coerce arguments. - * - * @private - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @returns {boolean} Returns `true` if `value` is greater than `other`, - * else `false`. - */ - function baseGt(value, other) { - return value > other; - } - - /** - * The base implementation of `_.has` without support for deep paths. - * - * @private - * @param {Object} [object] The object to query. - * @param {Array|string} key The key to check. - * @returns {boolean} Returns `true` if `key` exists, else `false`. - */ - function baseHas(object, key) { - return object != null && hasOwnProperty.call(object, key); - } - - /** - * The base implementation of `_.hasIn` without support for deep paths. - * - * @private - * @param {Object} [object] The object to query. - * @param {Array|string} key The key to check. - * @returns {boolean} Returns `true` if `key` exists, else `false`. - */ - function baseHasIn(object, key) { - return object != null && key in Object(object); - } - - /** - * The base implementation of `_.inRange` which doesn't coerce arguments. - * - * @private - * @param {number} number The number to check. - * @param {number} start The start of the range. - * @param {number} end The end of the range. - * @returns {boolean} Returns `true` if `number` is in the range, else `false`. - */ - function baseInRange(number, start, end) { - return number >= nativeMin(start, end) && number < nativeMax(start, end); - } - - /** - * The base implementation of methods like `_.intersection`, without support - * for iteratee shorthands, that accepts an array of arrays to inspect. - * - * @private - * @param {Array} arrays The arrays to inspect. - * @param {Function} [iteratee] The iteratee invoked per element. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns the new array of shared values. - */ - function baseIntersection(arrays, iteratee, comparator) { - var includes = comparator ? arrayIncludesWith : arrayIncludes, - length = arrays[0].length, - othLength = arrays.length, - othIndex = othLength, - caches = Array(othLength), - maxLength = Infinity, - result = []; - - while (othIndex--) { - var array = arrays[othIndex]; - if (othIndex && iteratee) { - array = arrayMap(array, baseUnary(iteratee)); - } - maxLength = nativeMin(array.length, maxLength); - caches[othIndex] = !comparator && (iteratee || (length >= 120 && array.length >= 120)) - ? new SetCache(othIndex && array) - : undefined; - } - array = arrays[0]; - - var index = -1, - seen = caches[0]; - - outer: - while (++index < length && result.length < maxLength) { - var value = array[index], - computed = iteratee ? iteratee(value) : value; - - value = (comparator || value !== 0) ? value : 0; - if (!(seen - ? cacheHas(seen, computed) - : includes(result, computed, comparator) - )) { - othIndex = othLength; - while (--othIndex) { - var cache = caches[othIndex]; - if (!(cache - ? cacheHas(cache, computed) - : includes(arrays[othIndex], computed, comparator)) - ) { - continue outer; - } - } - if (seen) { - seen.push(computed); - } - result.push(value); - } - } - return result; - } - - /** - * The base implementation of `_.invert` and `_.invertBy` which inverts - * `object` with values transformed by `iteratee` and set by `setter`. - * - * @private - * @param {Object} object The object to iterate over. - * @param {Function} setter The function to set `accumulator` values. - * @param {Function} iteratee The iteratee to transform values. - * @param {Object} accumulator The initial inverted object. - * @returns {Function} Returns `accumulator`. - */ - function baseInverter(object, setter, iteratee, accumulator) { - baseForOwn(object, function(value, key, object) { - setter(accumulator, iteratee(value), key, object); - }); - return accumulator; - } - - /** - * The base implementation of `_.invoke` without support for individual - * method arguments. - * - * @private - * @param {Object} object The object to query. - * @param {Array|string} path The path of the method to invoke. - * @param {Array} args The arguments to invoke the method with. - * @returns {*} Returns the result of the invoked method. - */ - function baseInvoke(object, path, args) { - path = castPath(path, object); - object = parent(object, path); - var func = object == null ? object : object[toKey(last(path))]; - return func == null ? undefined : apply(func, object, args); - } - - /** - * The base implementation of `_.isArguments`. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is an `arguments` object, - */ - function baseIsArguments(value) { - return isObjectLike(value) && baseGetTag(value) == argsTag; - } - - /** - * The base implementation of `_.isArrayBuffer` without Node.js optimizations. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is an array buffer, else `false`. - */ - function baseIsArrayBuffer(value) { - return isObjectLike(value) && baseGetTag(value) == arrayBufferTag; - } - - /** - * The base implementation of `_.isDate` without Node.js optimizations. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a date object, else `false`. - */ - function baseIsDate(value) { - return isObjectLike(value) && baseGetTag(value) == dateTag; - } - - /** - * The base implementation of `_.isEqual` which supports partial comparisons - * and tracks traversed objects. - * - * @private - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @param {boolean} bitmask The bitmask flags. - * 1 - Unordered comparison - * 2 - Partial comparison - * @param {Function} [customizer] The function to customize comparisons. - * @param {Object} [stack] Tracks traversed `value` and `other` objects. - * @returns {boolean} Returns `true` if the values are equivalent, else `false`. - */ - function baseIsEqual(value, other, bitmask, customizer, stack) { - if (value === other) { - return true; - } - if (value == null || other == null || (!isObjectLike(value) && !isObjectLike(other))) { - return value !== value && other !== other; - } - return baseIsEqualDeep(value, other, bitmask, customizer, baseIsEqual, stack); - } - - /** - * A specialized version of `baseIsEqual` for arrays and objects which performs - * deep comparisons and tracks traversed objects enabling objects with circular - * references to be compared. - * - * @private - * @param {Object} object The object to compare. - * @param {Object} other The other object to compare. - * @param {number} bitmask The bitmask flags. See `baseIsEqual` for more details. - * @param {Function} customizer The function to customize comparisons. - * @param {Function} equalFunc The function to determine equivalents of values. - * @param {Object} [stack] Tracks traversed `object` and `other` objects. - * @returns {boolean} Returns `true` if the objects are equivalent, else `false`. - */ - function baseIsEqualDeep(object, other, bitmask, customizer, equalFunc, stack) { - var objIsArr = isArray(object), - othIsArr = isArray(other), - objTag = objIsArr ? arrayTag : getTag(object), - othTag = othIsArr ? arrayTag : getTag(other); - - objTag = objTag == argsTag ? objectTag : objTag; - othTag = othTag == argsTag ? objectTag : othTag; - - var objIsObj = objTag == objectTag, - othIsObj = othTag == objectTag, - isSameTag = objTag == othTag; - - if (isSameTag && isBuffer(object)) { - if (!isBuffer(other)) { - return false; - } - objIsArr = true; - objIsObj = false; - } - if (isSameTag && !objIsObj) { - stack || (stack = new Stack); - return (objIsArr || isTypedArray(object)) - ? equalArrays(object, other, bitmask, customizer, equalFunc, stack) - : equalByTag(object, other, objTag, bitmask, customizer, equalFunc, stack); - } - if (!(bitmask & COMPARE_PARTIAL_FLAG)) { - var objIsWrapped = objIsObj && hasOwnProperty.call(object, '__wrapped__'), - othIsWrapped = othIsObj && hasOwnProperty.call(other, '__wrapped__'); - - if (objIsWrapped || othIsWrapped) { - var objUnwrapped = objIsWrapped ? object.value() : object, - othUnwrapped = othIsWrapped ? other.value() : other; - - stack || (stack = new Stack); - return equalFunc(objUnwrapped, othUnwrapped, bitmask, customizer, stack); - } - } - if (!isSameTag) { - return false; - } - stack || (stack = new Stack); - return equalObjects(object, other, bitmask, customizer, equalFunc, stack); - } - - /** - * The base implementation of `_.isMap` without Node.js optimizations. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a map, else `false`. - */ - function baseIsMap(value) { - return isObjectLike(value) && getTag(value) == mapTag; - } - - /** - * The base implementation of `_.isMatch` without support for iteratee shorthands. - * - * @private - * @param {Object} object The object to inspect. - * @param {Object} source The object of property values to match. - * @param {Array} matchData The property names, values, and compare flags to match. - * @param {Function} [customizer] The function to customize comparisons. - * @returns {boolean} Returns `true` if `object` is a match, else `false`. - */ - function baseIsMatch(object, source, matchData, customizer) { - var index = matchData.length, - length = index, - noCustomizer = !customizer; - - if (object == null) { - return !length; - } - object = Object(object); - while (index--) { - var data = matchData[index]; - if ((noCustomizer && data[2]) - ? data[1] !== object[data[0]] - : !(data[0] in object) - ) { - return false; - } - } - while (++index < length) { - data = matchData[index]; - var key = data[0], - objValue = object[key], - srcValue = data[1]; - - if (noCustomizer && data[2]) { - if (objValue === undefined && !(key in object)) { - return false; - } - } else { - var stack = new Stack; - if (customizer) { - var result = customizer(objValue, srcValue, key, object, source, stack); - } - if (!(result === undefined - ? baseIsEqual(srcValue, objValue, COMPARE_PARTIAL_FLAG | COMPARE_UNORDERED_FLAG, customizer, stack) - : result - )) { - return false; - } - } - } - return true; - } - - /** - * The base implementation of `_.isNative` without bad shim checks. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a native function, - * else `false`. - */ - function baseIsNative(value) { - if (!isObject(value) || isMasked(value)) { - return false; - } - var pattern = isFunction(value) ? reIsNative : reIsHostCtor; - return pattern.test(toSource(value)); - } - - /** - * The base implementation of `_.isRegExp` without Node.js optimizations. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a regexp, else `false`. - */ - function baseIsRegExp(value) { - return isObjectLike(value) && baseGetTag(value) == regexpTag; - } - - /** - * The base implementation of `_.isSet` without Node.js optimizations. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a set, else `false`. - */ - function baseIsSet(value) { - return isObjectLike(value) && getTag(value) == setTag; - } - - /** - * The base implementation of `_.isTypedArray` without Node.js optimizations. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a typed array, else `false`. - */ - function baseIsTypedArray(value) { - return isObjectLike(value) && - isLength(value.length) && !!typedArrayTags[baseGetTag(value)]; - } - - /** - * The base implementation of `_.iteratee`. - * - * @private - * @param {*} [value=_.identity] The value to convert to an iteratee. - * @returns {Function} Returns the iteratee. - */ - function baseIteratee(value) { - // Don't store the `typeof` result in a variable to avoid a JIT bug in Safari 9. - // See https://bugs.webkit.org/show_bug.cgi?id=156034 for more details. - if (typeof value == 'function') { - return value; - } - if (value == null) { - return identity; - } - if (typeof value == 'object') { - return isArray(value) - ? baseMatchesProperty(value[0], value[1]) - : baseMatches(value); - } - return property(value); - } - - /** - * The base implementation of `_.keys` which doesn't treat sparse arrays as dense. - * - * @private - * @param {Object} object The object to query. - * @returns {Array} Returns the array of property names. - */ - function baseKeys(object) { - if (!isPrototype(object)) { - return nativeKeys(object); - } - var result = []; - for (var key in Object(object)) { - if (hasOwnProperty.call(object, key) && key != 'constructor') { - result.push(key); - } - } - return result; - } - - /** - * The base implementation of `_.keysIn` which doesn't treat sparse arrays as dense. - * - * @private - * @param {Object} object The object to query. - * @returns {Array} Returns the array of property names. - */ - function baseKeysIn(object) { - if (!isObject(object)) { - return nativeKeysIn(object); - } - var isProto = isPrototype(object), - result = []; - - for (var key in object) { - if (!(key == 'constructor' && (isProto || !hasOwnProperty.call(object, key)))) { - result.push(key); - } - } - return result; - } - - /** - * The base implementation of `_.lt` which doesn't coerce arguments. - * - * @private - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @returns {boolean} Returns `true` if `value` is less than `other`, - * else `false`. - */ - function baseLt(value, other) { - return value < other; - } - - /** - * The base implementation of `_.map` without support for iteratee shorthands. - * - * @private - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} iteratee The function invoked per iteration. - * @returns {Array} Returns the new mapped array. - */ - function baseMap(collection, iteratee) { - var index = -1, - result = isArrayLike(collection) ? Array(collection.length) : []; - - baseEach(collection, function(value, key, collection) { - result[++index] = iteratee(value, key, collection); - }); - return result; - } - - /** - * The base implementation of `_.matches` which doesn't clone `source`. - * - * @private - * @param {Object} source The object of property values to match. - * @returns {Function} Returns the new spec function. - */ - function baseMatches(source) { - var matchData = getMatchData(source); - if (matchData.length == 1 && matchData[0][2]) { - return matchesStrictComparable(matchData[0][0], matchData[0][1]); - } - return function(object) { - return object === source || baseIsMatch(object, source, matchData); - }; - } - - /** - * The base implementation of `_.matchesProperty` which doesn't clone `srcValue`. - * - * @private - * @param {string} path The path of the property to get. - * @param {*} srcValue The value to match. - * @returns {Function} Returns the new spec function. - */ - function baseMatchesProperty(path, srcValue) { - if (isKey(path) && isStrictComparable(srcValue)) { - return matchesStrictComparable(toKey(path), srcValue); - } - return function(object) { - var objValue = get(object, path); - return (objValue === undefined && objValue === srcValue) - ? hasIn(object, path) - : baseIsEqual(srcValue, objValue, COMPARE_PARTIAL_FLAG | COMPARE_UNORDERED_FLAG); - }; - } - - /** - * The base implementation of `_.merge` without support for multiple sources. - * - * @private - * @param {Object} object The destination object. - * @param {Object} source The source object. - * @param {number} srcIndex The index of `source`. - * @param {Function} [customizer] The function to customize merged values. - * @param {Object} [stack] Tracks traversed source values and their merged - * counterparts. - */ - function baseMerge(object, source, srcIndex, customizer, stack) { - if (object === source) { - return; - } - baseFor(source, function(srcValue, key) { - stack || (stack = new Stack); - if (isObject(srcValue)) { - baseMergeDeep(object, source, key, srcIndex, baseMerge, customizer, stack); - } - else { - var newValue = customizer - ? customizer(safeGet(object, key), srcValue, (key + ''), object, source, stack) - : undefined; - - if (newValue === undefined) { - newValue = srcValue; - } - assignMergeValue(object, key, newValue); - } - }, keysIn); - } - - /** - * A specialized version of `baseMerge` for arrays and objects which performs - * deep merges and tracks traversed objects enabling objects with circular - * references to be merged. - * - * @private - * @param {Object} object The destination object. - * @param {Object} source The source object. - * @param {string} key The key of the value to merge. - * @param {number} srcIndex The index of `source`. - * @param {Function} mergeFunc The function to merge values. - * @param {Function} [customizer] The function to customize assigned values. - * @param {Object} [stack] Tracks traversed source values and their merged - * counterparts. - */ - function baseMergeDeep(object, source, key, srcIndex, mergeFunc, customizer, stack) { - var objValue = safeGet(object, key), - srcValue = safeGet(source, key), - stacked = stack.get(srcValue); - - if (stacked) { - assignMergeValue(object, key, stacked); - return; - } - var newValue = customizer - ? customizer(objValue, srcValue, (key + ''), object, source, stack) - : undefined; - - var isCommon = newValue === undefined; - - if (isCommon) { - var isArr = isArray(srcValue), - isBuff = !isArr && isBuffer(srcValue), - isTyped = !isArr && !isBuff && isTypedArray(srcValue); - - newValue = srcValue; - if (isArr || isBuff || isTyped) { - if (isArray(objValue)) { - newValue = objValue; - } - else if (isArrayLikeObject(objValue)) { - newValue = copyArray(objValue); - } - else if (isBuff) { - isCommon = false; - newValue = cloneBuffer(srcValue, true); - } - else if (isTyped) { - isCommon = false; - newValue = cloneTypedArray(srcValue, true); - } - else { - newValue = []; - } - } - else if (isPlainObject(srcValue) || isArguments(srcValue)) { - newValue = objValue; - if (isArguments(objValue)) { - newValue = toPlainObject(objValue); - } - else if (!isObject(objValue) || isFunction(objValue)) { - newValue = initCloneObject(srcValue); - } - } - else { - isCommon = false; - } - } - if (isCommon) { - // Recursively merge objects and arrays (susceptible to call stack limits). - stack.set(srcValue, newValue); - mergeFunc(newValue, srcValue, srcIndex, customizer, stack); - stack['delete'](srcValue); - } - assignMergeValue(object, key, newValue); - } - - /** - * The base implementation of `_.nth` which doesn't coerce arguments. - * - * @private - * @param {Array} array The array to query. - * @param {number} n The index of the element to return. - * @returns {*} Returns the nth element of `array`. - */ - function baseNth(array, n) { - var length = array.length; - if (!length) { - return; - } - n += n < 0 ? length : 0; - return isIndex(n, length) ? array[n] : undefined; - } - - /** - * The base implementation of `_.orderBy` without param guards. - * - * @private - * @param {Array|Object} collection The collection to iterate over. - * @param {Function[]|Object[]|string[]} iteratees The iteratees to sort by. - * @param {string[]} orders The sort orders of `iteratees`. - * @returns {Array} Returns the new sorted array. - */ - function baseOrderBy(collection, iteratees, orders) { - var index = -1; - iteratees = arrayMap(iteratees.length ? iteratees : [identity], baseUnary(getIteratee())); - - var result = baseMap(collection, function(value, key, collection) { - var criteria = arrayMap(iteratees, function(iteratee) { - return iteratee(value); - }); - return { 'criteria': criteria, 'index': ++index, 'value': value }; - }); - - return baseSortBy(result, function(object, other) { - return compareMultiple(object, other, orders); - }); - } - - /** - * The base implementation of `_.pick` without support for individual - * property identifiers. - * - * @private - * @param {Object} object The source object. - * @param {string[]} paths The property paths to pick. - * @returns {Object} Returns the new object. - */ - function basePick(object, paths) { - return basePickBy(object, paths, function(value, path) { - return hasIn(object, path); - }); - } - - /** - * The base implementation of `_.pickBy` without support for iteratee shorthands. - * - * @private - * @param {Object} object The source object. - * @param {string[]} paths The property paths to pick. - * @param {Function} predicate The function invoked per property. - * @returns {Object} Returns the new object. - */ - function basePickBy(object, paths, predicate) { - var index = -1, - length = paths.length, - result = {}; - - while (++index < length) { - var path = paths[index], - value = baseGet(object, path); - - if (predicate(value, path)) { - baseSet(result, castPath(path, object), value); - } - } - return result; - } - - /** - * A specialized version of `baseProperty` which supports deep paths. - * - * @private - * @param {Array|string} path The path of the property to get. - * @returns {Function} Returns the new accessor function. - */ - function basePropertyDeep(path) { - return function(object) { - return baseGet(object, path); - }; - } - - /** - * The base implementation of `_.pullAllBy` without support for iteratee - * shorthands. - * - * @private - * @param {Array} array The array to modify. - * @param {Array} values The values to remove. - * @param {Function} [iteratee] The iteratee invoked per element. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns `array`. - */ - function basePullAll(array, values, iteratee, comparator) { - var indexOf = comparator ? baseIndexOfWith : baseIndexOf, - index = -1, - length = values.length, - seen = array; - - if (array === values) { - values = copyArray(values); - } - if (iteratee) { - seen = arrayMap(array, baseUnary(iteratee)); - } - while (++index < length) { - var fromIndex = 0, - value = values[index], - computed = iteratee ? iteratee(value) : value; - - while ((fromIndex = indexOf(seen, computed, fromIndex, comparator)) > -1) { - if (seen !== array) { - splice.call(seen, fromIndex, 1); - } - splice.call(array, fromIndex, 1); - } - } - return array; - } - - /** - * The base implementation of `_.pullAt` without support for individual - * indexes or capturing the removed elements. - * - * @private - * @param {Array} array The array to modify. - * @param {number[]} indexes The indexes of elements to remove. - * @returns {Array} Returns `array`. - */ - function basePullAt(array, indexes) { - var length = array ? indexes.length : 0, - lastIndex = length - 1; - - while (length--) { - var index = indexes[length]; - if (length == lastIndex || index !== previous) { - var previous = index; - if (isIndex(index)) { - splice.call(array, index, 1); - } else { - baseUnset(array, index); - } - } - } - return array; - } - - /** - * The base implementation of `_.random` without support for returning - * floating-point numbers. - * - * @private - * @param {number} lower The lower bound. - * @param {number} upper The upper bound. - * @returns {number} Returns the random number. - */ - function baseRandom(lower, upper) { - return lower + nativeFloor(nativeRandom() * (upper - lower + 1)); - } - - /** - * The base implementation of `_.range` and `_.rangeRight` which doesn't - * coerce arguments. - * - * @private - * @param {number} start The start of the range. - * @param {number} end The end of the range. - * @param {number} step The value to increment or decrement by. - * @param {boolean} [fromRight] Specify iterating from right to left. - * @returns {Array} Returns the range of numbers. - */ - function baseRange(start, end, step, fromRight) { - var index = -1, - length = nativeMax(nativeCeil((end - start) / (step || 1)), 0), - result = Array(length); - - while (length--) { - result[fromRight ? length : ++index] = start; - start += step; - } - return result; - } - - /** - * The base implementation of `_.repeat` which doesn't coerce arguments. - * - * @private - * @param {string} string The string to repeat. - * @param {number} n The number of times to repeat the string. - * @returns {string} Returns the repeated string. - */ - function baseRepeat(string, n) { - var result = ''; - if (!string || n < 1 || n > MAX_SAFE_INTEGER) { - return result; - } - // Leverage the exponentiation by squaring algorithm for a faster repeat. - // See https://en.wikipedia.org/wiki/Exponentiation_by_squaring for more details. - do { - if (n % 2) { - result += string; - } - n = nativeFloor(n / 2); - if (n) { - string += string; - } - } while (n); - - return result; - } - - /** - * The base implementation of `_.rest` which doesn't validate or coerce arguments. - * - * @private - * @param {Function} func The function to apply a rest parameter to. - * @param {number} [start=func.length-1] The start position of the rest parameter. - * @returns {Function} Returns the new function. - */ - function baseRest(func, start) { - return setToString(overRest(func, start, identity), func + ''); - } - - /** - * The base implementation of `_.sample`. - * - * @private - * @param {Array|Object} collection The collection to sample. - * @returns {*} Returns the random element. - */ - function baseSample(collection) { - return arraySample(values(collection)); - } - - /** - * The base implementation of `_.sampleSize` without param guards. - * - * @private - * @param {Array|Object} collection The collection to sample. - * @param {number} n The number of elements to sample. - * @returns {Array} Returns the random elements. - */ - function baseSampleSize(collection, n) { - var array = values(collection); - return shuffleSelf(array, baseClamp(n, 0, array.length)); - } - - /** - * The base implementation of `_.set`. - * - * @private - * @param {Object} object The object to modify. - * @param {Array|string} path The path of the property to set. - * @param {*} value The value to set. - * @param {Function} [customizer] The function to customize path creation. - * @returns {Object} Returns `object`. - */ - function baseSet(object, path, value, customizer) { - if (!isObject(object)) { - return object; - } - path = castPath(path, object); - - var index = -1, - length = path.length, - lastIndex = length - 1, - nested = object; - - while (nested != null && ++index < length) { - var key = toKey(path[index]), - newValue = value; - - if (index != lastIndex) { - var objValue = nested[key]; - newValue = customizer ? customizer(objValue, key, nested) : undefined; - if (newValue === undefined) { - newValue = isObject(objValue) - ? objValue - : (isIndex(path[index + 1]) ? [] : {}); - } - } - assignValue(nested, key, newValue); - nested = nested[key]; - } - return object; - } - - /** - * The base implementation of `setData` without support for hot loop shorting. - * - * @private - * @param {Function} func The function to associate metadata with. - * @param {*} data The metadata. - * @returns {Function} Returns `func`. - */ - var baseSetData = !metaMap ? identity : function(func, data) { - metaMap.set(func, data); - return func; - }; - - /** - * The base implementation of `setToString` without support for hot loop shorting. - * - * @private - * @param {Function} func The function to modify. - * @param {Function} string The `toString` result. - * @returns {Function} Returns `func`. - */ - var baseSetToString = !defineProperty ? identity : function(func, string) { - return defineProperty(func, 'toString', { - 'configurable': true, - 'enumerable': false, - 'value': constant(string), - 'writable': true - }); - }; - - /** - * The base implementation of `_.shuffle`. - * - * @private - * @param {Array|Object} collection The collection to shuffle. - * @returns {Array} Returns the new shuffled array. - */ - function baseShuffle(collection) { - return shuffleSelf(values(collection)); - } - - /** - * The base implementation of `_.slice` without an iteratee call guard. - * - * @private - * @param {Array} array The array to slice. - * @param {number} [start=0] The start position. - * @param {number} [end=array.length] The end position. - * @returns {Array} Returns the slice of `array`. - */ - function baseSlice(array, start, end) { - var index = -1, - length = array.length; - - if (start < 0) { - start = -start > length ? 0 : (length + start); - } - end = end > length ? length : end; - if (end < 0) { - end += length; - } - length = start > end ? 0 : ((end - start) >>> 0); - start >>>= 0; - - var result = Array(length); - while (++index < length) { - result[index] = array[index + start]; - } - return result; - } - - /** - * The base implementation of `_.some` without support for iteratee shorthands. - * - * @private - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} predicate The function invoked per iteration. - * @returns {boolean} Returns `true` if any element passes the predicate check, - * else `false`. - */ - function baseSome(collection, predicate) { - var result; - - baseEach(collection, function(value, index, collection) { - result = predicate(value, index, collection); - return !result; - }); - return !!result; - } - - /** - * The base implementation of `_.sortedIndex` and `_.sortedLastIndex` which - * performs a binary search of `array` to determine the index at which `value` - * should be inserted into `array` in order to maintain its sort order. - * - * @private - * @param {Array} array The sorted array to inspect. - * @param {*} value The value to evaluate. - * @param {boolean} [retHighest] Specify returning the highest qualified index. - * @returns {number} Returns the index at which `value` should be inserted - * into `array`. - */ - function baseSortedIndex(array, value, retHighest) { - var low = 0, - high = array == null ? low : array.length; - - if (typeof value == 'number' && value === value && high <= HALF_MAX_ARRAY_LENGTH) { - while (low < high) { - var mid = (low + high) >>> 1, - computed = array[mid]; - - if (computed !== null && !isSymbol(computed) && - (retHighest ? (computed <= value) : (computed < value))) { - low = mid + 1; - } else { - high = mid; - } - } - return high; - } - return baseSortedIndexBy(array, value, identity, retHighest); - } - - /** - * The base implementation of `_.sortedIndexBy` and `_.sortedLastIndexBy` - * which invokes `iteratee` for `value` and each element of `array` to compute - * their sort ranking. The iteratee is invoked with one argument; (value). - * - * @private - * @param {Array} array The sorted array to inspect. - * @param {*} value The value to evaluate. - * @param {Function} iteratee The iteratee invoked per element. - * @param {boolean} [retHighest] Specify returning the highest qualified index. - * @returns {number} Returns the index at which `value` should be inserted - * into `array`. - */ - function baseSortedIndexBy(array, value, iteratee, retHighest) { - value = iteratee(value); - - var low = 0, - high = array == null ? 0 : array.length, - valIsNaN = value !== value, - valIsNull = value === null, - valIsSymbol = isSymbol(value), - valIsUndefined = value === undefined; - - while (low < high) { - var mid = nativeFloor((low + high) / 2), - computed = iteratee(array[mid]), - othIsDefined = computed !== undefined, - othIsNull = computed === null, - othIsReflexive = computed === computed, - othIsSymbol = isSymbol(computed); - - if (valIsNaN) { - var setLow = retHighest || othIsReflexive; - } else if (valIsUndefined) { - setLow = othIsReflexive && (retHighest || othIsDefined); - } else if (valIsNull) { - setLow = othIsReflexive && othIsDefined && (retHighest || !othIsNull); - } else if (valIsSymbol) { - setLow = othIsReflexive && othIsDefined && !othIsNull && (retHighest || !othIsSymbol); - } else if (othIsNull || othIsSymbol) { - setLow = false; - } else { - setLow = retHighest ? (computed <= value) : (computed < value); - } - if (setLow) { - low = mid + 1; - } else { - high = mid; - } - } - return nativeMin(high, MAX_ARRAY_INDEX); - } - - /** - * The base implementation of `_.sortedUniq` and `_.sortedUniqBy` without - * support for iteratee shorthands. - * - * @private - * @param {Array} array The array to inspect. - * @param {Function} [iteratee] The iteratee invoked per element. - * @returns {Array} Returns the new duplicate free array. - */ - function baseSortedUniq(array, iteratee) { - var index = -1, - length = array.length, - resIndex = 0, - result = []; - - while (++index < length) { - var value = array[index], - computed = iteratee ? iteratee(value) : value; - - if (!index || !eq(computed, seen)) { - var seen = computed; - result[resIndex++] = value === 0 ? 0 : value; - } - } - return result; - } - - /** - * The base implementation of `_.toNumber` which doesn't ensure correct - * conversions of binary, hexadecimal, or octal string values. - * - * @private - * @param {*} value The value to process. - * @returns {number} Returns the number. - */ - function baseToNumber(value) { - if (typeof value == 'number') { - return value; - } - if (isSymbol(value)) { - return NAN; - } - return +value; - } - - /** - * The base implementation of `_.toString` which doesn't convert nullish - * values to empty strings. - * - * @private - * @param {*} value The value to process. - * @returns {string} Returns the string. - */ - function baseToString(value) { - // Exit early for strings to avoid a performance hit in some environments. - if (typeof value == 'string') { - return value; - } - if (isArray(value)) { - // Recursively convert values (susceptible to call stack limits). - return arrayMap(value, baseToString) + ''; - } - if (isSymbol(value)) { - return symbolToString ? symbolToString.call(value) : ''; - } - var result = (value + ''); - return (result == '0' && (1 / value) == -INFINITY) ? '-0' : result; - } - - /** - * The base implementation of `_.uniqBy` without support for iteratee shorthands. - * - * @private - * @param {Array} array The array to inspect. - * @param {Function} [iteratee] The iteratee invoked per element. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns the new duplicate free array. - */ - function baseUniq(array, iteratee, comparator) { - var index = -1, - includes = arrayIncludes, - length = array.length, - isCommon = true, - result = [], - seen = result; - - if (comparator) { - isCommon = false; - includes = arrayIncludesWith; - } - else if (length >= LARGE_ARRAY_SIZE) { - var set = iteratee ? null : createSet(array); - if (set) { - return setToArray(set); - } - isCommon = false; - includes = cacheHas; - seen = new SetCache; - } - else { - seen = iteratee ? [] : result; - } - outer: - while (++index < length) { - var value = array[index], - computed = iteratee ? iteratee(value) : value; - - value = (comparator || value !== 0) ? value : 0; - if (isCommon && computed === computed) { - var seenIndex = seen.length; - while (seenIndex--) { - if (seen[seenIndex] === computed) { - continue outer; - } - } - if (iteratee) { - seen.push(computed); - } - result.push(value); - } - else if (!includes(seen, computed, comparator)) { - if (seen !== result) { - seen.push(computed); - } - result.push(value); - } - } - return result; - } - - /** - * The base implementation of `_.unset`. - * - * @private - * @param {Object} object The object to modify. - * @param {Array|string} path The property path to unset. - * @returns {boolean} Returns `true` if the property is deleted, else `false`. - */ - function baseUnset(object, path) { - path = castPath(path, object); - object = parent(object, path); - return object == null || delete object[toKey(last(path))]; - } - - /** - * The base implementation of `_.update`. - * - * @private - * @param {Object} object The object to modify. - * @param {Array|string} path The path of the property to update. - * @param {Function} updater The function to produce the updated value. - * @param {Function} [customizer] The function to customize path creation. - * @returns {Object} Returns `object`. - */ - function baseUpdate(object, path, updater, customizer) { - return baseSet(object, path, updater(baseGet(object, path)), customizer); - } - - /** - * The base implementation of methods like `_.dropWhile` and `_.takeWhile` - * without support for iteratee shorthands. - * - * @private - * @param {Array} array The array to query. - * @param {Function} predicate The function invoked per iteration. - * @param {boolean} [isDrop] Specify dropping elements instead of taking them. - * @param {boolean} [fromRight] Specify iterating from right to left. - * @returns {Array} Returns the slice of `array`. - */ - function baseWhile(array, predicate, isDrop, fromRight) { - var length = array.length, - index = fromRight ? length : -1; - - while ((fromRight ? index-- : ++index < length) && - predicate(array[index], index, array)) {} - - return isDrop - ? baseSlice(array, (fromRight ? 0 : index), (fromRight ? index + 1 : length)) - : baseSlice(array, (fromRight ? index + 1 : 0), (fromRight ? length : index)); - } - - /** - * The base implementation of `wrapperValue` which returns the result of - * performing a sequence of actions on the unwrapped `value`, where each - * successive action is supplied the return value of the previous. - * - * @private - * @param {*} value The unwrapped value. - * @param {Array} actions Actions to perform to resolve the unwrapped value. - * @returns {*} Returns the resolved value. - */ - function baseWrapperValue(value, actions) { - var result = value; - if (result instanceof LazyWrapper) { - result = result.value(); - } - return arrayReduce(actions, function(result, action) { - return action.func.apply(action.thisArg, arrayPush([result], action.args)); - }, result); - } - - /** - * The base implementation of methods like `_.xor`, without support for - * iteratee shorthands, that accepts an array of arrays to inspect. - * - * @private - * @param {Array} arrays The arrays to inspect. - * @param {Function} [iteratee] The iteratee invoked per element. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns the new array of values. - */ - function baseXor(arrays, iteratee, comparator) { - var length = arrays.length; - if (length < 2) { - return length ? baseUniq(arrays[0]) : []; - } - var index = -1, - result = Array(length); - - while (++index < length) { - var array = arrays[index], - othIndex = -1; - - while (++othIndex < length) { - if (othIndex != index) { - result[index] = baseDifference(result[index] || array, arrays[othIndex], iteratee, comparator); - } - } - } - return baseUniq(baseFlatten(result, 1), iteratee, comparator); - } - - /** - * This base implementation of `_.zipObject` which assigns values using `assignFunc`. - * - * @private - * @param {Array} props The property identifiers. - * @param {Array} values The property values. - * @param {Function} assignFunc The function to assign values. - * @returns {Object} Returns the new object. - */ - function baseZipObject(props, values, assignFunc) { - var index = -1, - length = props.length, - valsLength = values.length, - result = {}; - - while (++index < length) { - var value = index < valsLength ? values[index] : undefined; - assignFunc(result, props[index], value); - } - return result; - } - - /** - * Casts `value` to an empty array if it's not an array like object. - * - * @private - * @param {*} value The value to inspect. - * @returns {Array|Object} Returns the cast array-like object. - */ - function castArrayLikeObject(value) { - return isArrayLikeObject(value) ? value : []; - } - - /** - * Casts `value` to `identity` if it's not a function. - * - * @private - * @param {*} value The value to inspect. - * @returns {Function} Returns cast function. - */ - function castFunction(value) { - return typeof value == 'function' ? value : identity; - } - - /** - * Casts `value` to a path array if it's not one. - * - * @private - * @param {*} value The value to inspect. - * @param {Object} [object] The object to query keys on. - * @returns {Array} Returns the cast property path array. - */ - function castPath(value, object) { - if (isArray(value)) { - return value; - } - return isKey(value, object) ? [value] : stringToPath(toString(value)); - } - - /** - * A `baseRest` alias which can be replaced with `identity` by module - * replacement plugins. - * - * @private - * @type {Function} - * @param {Function} func The function to apply a rest parameter to. - * @returns {Function} Returns the new function. - */ - var castRest = baseRest; - - /** - * Casts `array` to a slice if it's needed. - * - * @private - * @param {Array} array The array to inspect. - * @param {number} start The start position. - * @param {number} [end=array.length] The end position. - * @returns {Array} Returns the cast slice. - */ - function castSlice(array, start, end) { - var length = array.length; - end = end === undefined ? length : end; - return (!start && end >= length) ? array : baseSlice(array, start, end); - } - - /** - * A simple wrapper around the global [`clearTimeout`](https://mdn.io/clearTimeout). - * - * @private - * @param {number|Object} id The timer id or timeout object of the timer to clear. - */ - var clearTimeout = ctxClearTimeout || function(id) { - return root.clearTimeout(id); - }; - - /** - * Creates a clone of `buffer`. - * - * @private - * @param {Buffer} buffer The buffer to clone. - * @param {boolean} [isDeep] Specify a deep clone. - * @returns {Buffer} Returns the cloned buffer. - */ - function cloneBuffer(buffer, isDeep) { - if (isDeep) { - return buffer.slice(); - } - var length = buffer.length, - result = allocUnsafe ? allocUnsafe(length) : new buffer.constructor(length); - - buffer.copy(result); - return result; - } - - /** - * Creates a clone of `arrayBuffer`. - * - * @private - * @param {ArrayBuffer} arrayBuffer The array buffer to clone. - * @returns {ArrayBuffer} Returns the cloned array buffer. - */ - function cloneArrayBuffer(arrayBuffer) { - var result = new arrayBuffer.constructor(arrayBuffer.byteLength); - new Uint8Array(result).set(new Uint8Array(arrayBuffer)); - return result; - } - - /** - * Creates a clone of `dataView`. - * - * @private - * @param {Object} dataView The data view to clone. - * @param {boolean} [isDeep] Specify a deep clone. - * @returns {Object} Returns the cloned data view. - */ - function cloneDataView(dataView, isDeep) { - var buffer = isDeep ? cloneArrayBuffer(dataView.buffer) : dataView.buffer; - return new dataView.constructor(buffer, dataView.byteOffset, dataView.byteLength); - } - - /** - * Creates a clone of `regexp`. - * - * @private - * @param {Object} regexp The regexp to clone. - * @returns {Object} Returns the cloned regexp. - */ - function cloneRegExp(regexp) { - var result = new regexp.constructor(regexp.source, reFlags.exec(regexp)); - result.lastIndex = regexp.lastIndex; - return result; - } - - /** - * Creates a clone of the `symbol` object. - * - * @private - * @param {Object} symbol The symbol object to clone. - * @returns {Object} Returns the cloned symbol object. - */ - function cloneSymbol(symbol) { - return symbolValueOf ? Object(symbolValueOf.call(symbol)) : {}; - } - - /** - * Creates a clone of `typedArray`. - * - * @private - * @param {Object} typedArray The typed array to clone. - * @param {boolean} [isDeep] Specify a deep clone. - * @returns {Object} Returns the cloned typed array. - */ - function cloneTypedArray(typedArray, isDeep) { - var buffer = isDeep ? cloneArrayBuffer(typedArray.buffer) : typedArray.buffer; - return new typedArray.constructor(buffer, typedArray.byteOffset, typedArray.length); - } - - /** - * Compares values to sort them in ascending order. - * - * @private - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @returns {number} Returns the sort order indicator for `value`. - */ - function compareAscending(value, other) { - if (value !== other) { - var valIsDefined = value !== undefined, - valIsNull = value === null, - valIsReflexive = value === value, - valIsSymbol = isSymbol(value); - - var othIsDefined = other !== undefined, - othIsNull = other === null, - othIsReflexive = other === other, - othIsSymbol = isSymbol(other); - - if ((!othIsNull && !othIsSymbol && !valIsSymbol && value > other) || - (valIsSymbol && othIsDefined && othIsReflexive && !othIsNull && !othIsSymbol) || - (valIsNull && othIsDefined && othIsReflexive) || - (!valIsDefined && othIsReflexive) || - !valIsReflexive) { - return 1; - } - if ((!valIsNull && !valIsSymbol && !othIsSymbol && value < other) || - (othIsSymbol && valIsDefined && valIsReflexive && !valIsNull && !valIsSymbol) || - (othIsNull && valIsDefined && valIsReflexive) || - (!othIsDefined && valIsReflexive) || - !othIsReflexive) { - return -1; - } - } - return 0; - } - - /** - * Used by `_.orderBy` to compare multiple properties of a value to another - * and stable sort them. - * - * If `orders` is unspecified, all values are sorted in ascending order. Otherwise, - * specify an order of "desc" for descending or "asc" for ascending sort order - * of corresponding values. - * - * @private - * @param {Object} object The object to compare. - * @param {Object} other The other object to compare. - * @param {boolean[]|string[]} orders The order to sort by for each property. - * @returns {number} Returns the sort order indicator for `object`. - */ - function compareMultiple(object, other, orders) { - var index = -1, - objCriteria = object.criteria, - othCriteria = other.criteria, - length = objCriteria.length, - ordersLength = orders.length; - - while (++index < length) { - var result = compareAscending(objCriteria[index], othCriteria[index]); - if (result) { - if (index >= ordersLength) { - return result; - } - var order = orders[index]; - return result * (order == 'desc' ? -1 : 1); - } - } - // Fixes an `Array#sort` bug in the JS engine embedded in Adobe applications - // that causes it, under certain circumstances, to provide the same value for - // `object` and `other`. See https://github.com/jashkenas/underscore/pull/1247 - // for more details. - // - // This also ensures a stable sort in V8 and other engines. - // See https://bugs.chromium.org/p/v8/issues/detail?id=90 for more details. - return object.index - other.index; - } - - /** - * Creates an array that is the composition of partially applied arguments, - * placeholders, and provided arguments into a single array of arguments. - * - * @private - * @param {Array} args The provided arguments. - * @param {Array} partials The arguments to prepend to those provided. - * @param {Array} holders The `partials` placeholder indexes. - * @params {boolean} [isCurried] Specify composing for a curried function. - * @returns {Array} Returns the new array of composed arguments. - */ - function composeArgs(args, partials, holders, isCurried) { - var argsIndex = -1, - argsLength = args.length, - holdersLength = holders.length, - leftIndex = -1, - leftLength = partials.length, - rangeLength = nativeMax(argsLength - holdersLength, 0), - result = Array(leftLength + rangeLength), - isUncurried = !isCurried; - - while (++leftIndex < leftLength) { - result[leftIndex] = partials[leftIndex]; - } - while (++argsIndex < holdersLength) { - if (isUncurried || argsIndex < argsLength) { - result[holders[argsIndex]] = args[argsIndex]; - } - } - while (rangeLength--) { - result[leftIndex++] = args[argsIndex++]; - } - return result; - } - - /** - * This function is like `composeArgs` except that the arguments composition - * is tailored for `_.partialRight`. - * - * @private - * @param {Array} args The provided arguments. - * @param {Array} partials The arguments to append to those provided. - * @param {Array} holders The `partials` placeholder indexes. - * @params {boolean} [isCurried] Specify composing for a curried function. - * @returns {Array} Returns the new array of composed arguments. - */ - function composeArgsRight(args, partials, holders, isCurried) { - var argsIndex = -1, - argsLength = args.length, - holdersIndex = -1, - holdersLength = holders.length, - rightIndex = -1, - rightLength = partials.length, - rangeLength = nativeMax(argsLength - holdersLength, 0), - result = Array(rangeLength + rightLength), - isUncurried = !isCurried; - - while (++argsIndex < rangeLength) { - result[argsIndex] = args[argsIndex]; - } - var offset = argsIndex; - while (++rightIndex < rightLength) { - result[offset + rightIndex] = partials[rightIndex]; - } - while (++holdersIndex < holdersLength) { - if (isUncurried || argsIndex < argsLength) { - result[offset + holders[holdersIndex]] = args[argsIndex++]; - } - } - return result; - } - - /** - * Copies the values of `source` to `array`. - * - * @private - * @param {Array} source The array to copy values from. - * @param {Array} [array=[]] The array to copy values to. - * @returns {Array} Returns `array`. - */ - function copyArray(source, array) { - var index = -1, - length = source.length; - - array || (array = Array(length)); - while (++index < length) { - array[index] = source[index]; - } - return array; - } - - /** - * Copies properties of `source` to `object`. - * - * @private - * @param {Object} source The object to copy properties from. - * @param {Array} props The property identifiers to copy. - * @param {Object} [object={}] The object to copy properties to. - * @param {Function} [customizer] The function to customize copied values. - * @returns {Object} Returns `object`. - */ - function copyObject(source, props, object, customizer) { - var isNew = !object; - object || (object = {}); - - var index = -1, - length = props.length; - - while (++index < length) { - var key = props[index]; - - var newValue = customizer - ? customizer(object[key], source[key], key, object, source) - : undefined; - - if (newValue === undefined) { - newValue = source[key]; - } - if (isNew) { - baseAssignValue(object, key, newValue); - } else { - assignValue(object, key, newValue); - } - } - return object; - } - - /** - * Copies own symbols of `source` to `object`. - * - * @private - * @param {Object} source The object to copy symbols from. - * @param {Object} [object={}] The object to copy symbols to. - * @returns {Object} Returns `object`. - */ - function copySymbols(source, object) { - return copyObject(source, getSymbols(source), object); - } - - /** - * Copies own and inherited symbols of `source` to `object`. - * - * @private - * @param {Object} source The object to copy symbols from. - * @param {Object} [object={}] The object to copy symbols to. - * @returns {Object} Returns `object`. - */ - function copySymbolsIn(source, object) { - return copyObject(source, getSymbolsIn(source), object); - } - - /** - * Creates a function like `_.groupBy`. - * - * @private - * @param {Function} setter The function to set accumulator values. - * @param {Function} [initializer] The accumulator object initializer. - * @returns {Function} Returns the new aggregator function. - */ - function createAggregator(setter, initializer) { - return function(collection, iteratee) { - var func = isArray(collection) ? arrayAggregator : baseAggregator, - accumulator = initializer ? initializer() : {}; - - return func(collection, setter, getIteratee(iteratee, 2), accumulator); - }; - } - - /** - * Creates a function like `_.assign`. - * - * @private - * @param {Function} assigner The function to assign values. - * @returns {Function} Returns the new assigner function. - */ - function createAssigner(assigner) { - return baseRest(function(object, sources) { - var index = -1, - length = sources.length, - customizer = length > 1 ? sources[length - 1] : undefined, - guard = length > 2 ? sources[2] : undefined; - - customizer = (assigner.length > 3 && typeof customizer == 'function') - ? (length--, customizer) - : undefined; - - if (guard && isIterateeCall(sources[0], sources[1], guard)) { - customizer = length < 3 ? undefined : customizer; - length = 1; - } - object = Object(object); - while (++index < length) { - var source = sources[index]; - if (source) { - assigner(object, source, index, customizer); - } - } - return object; - }); - } - - /** - * Creates a `baseEach` or `baseEachRight` function. - * - * @private - * @param {Function} eachFunc The function to iterate over a collection. - * @param {boolean} [fromRight] Specify iterating from right to left. - * @returns {Function} Returns the new base function. - */ - function createBaseEach(eachFunc, fromRight) { - return function(collection, iteratee) { - if (collection == null) { - return collection; - } - if (!isArrayLike(collection)) { - return eachFunc(collection, iteratee); - } - var length = collection.length, - index = fromRight ? length : -1, - iterable = Object(collection); - - while ((fromRight ? index-- : ++index < length)) { - if (iteratee(iterable[index], index, iterable) === false) { - break; - } - } - return collection; - }; - } - - /** - * Creates a base function for methods like `_.forIn` and `_.forOwn`. - * - * @private - * @param {boolean} [fromRight] Specify iterating from right to left. - * @returns {Function} Returns the new base function. - */ - function createBaseFor(fromRight) { - return function(object, iteratee, keysFunc) { - var index = -1, - iterable = Object(object), - props = keysFunc(object), - length = props.length; - - while (length--) { - var key = props[fromRight ? length : ++index]; - if (iteratee(iterable[key], key, iterable) === false) { - break; - } - } - return object; - }; - } - - /** - * Creates a function that wraps `func` to invoke it with the optional `this` - * binding of `thisArg`. - * - * @private - * @param {Function} func The function to wrap. - * @param {number} bitmask The bitmask flags. See `createWrap` for more details. - * @param {*} [thisArg] The `this` binding of `func`. - * @returns {Function} Returns the new wrapped function. - */ - function createBind(func, bitmask, thisArg) { - var isBind = bitmask & WRAP_BIND_FLAG, - Ctor = createCtor(func); - - function wrapper() { - var fn = (this && this !== root && this instanceof wrapper) ? Ctor : func; - return fn.apply(isBind ? thisArg : this, arguments); - } - return wrapper; - } - - /** - * Creates a function like `_.lowerFirst`. - * - * @private - * @param {string} methodName The name of the `String` case method to use. - * @returns {Function} Returns the new case function. - */ - function createCaseFirst(methodName) { - return function(string) { - string = toString(string); - - var strSymbols = hasUnicode(string) - ? stringToArray(string) - : undefined; - - var chr = strSymbols - ? strSymbols[0] - : string.charAt(0); - - var trailing = strSymbols - ? castSlice(strSymbols, 1).join('') - : string.slice(1); - - return chr[methodName]() + trailing; - }; - } - - /** - * Creates a function like `_.camelCase`. - * - * @private - * @param {Function} callback The function to combine each word. - * @returns {Function} Returns the new compounder function. - */ - function createCompounder(callback) { - return function(string) { - return arrayReduce(words(deburr(string).replace(reApos, '')), callback, ''); - }; - } - - /** - * Creates a function that produces an instance of `Ctor` regardless of - * whether it was invoked as part of a `new` expression or by `call` or `apply`. - * - * @private - * @param {Function} Ctor The constructor to wrap. - * @returns {Function} Returns the new wrapped function. - */ - function createCtor(Ctor) { - return function() { - // Use a `switch` statement to work with class constructors. See - // http://ecma-international.org/ecma-262/7.0/#sec-ecmascript-function-objects-call-thisargument-argumentslist - // for more details. - var args = arguments; - switch (args.length) { - case 0: return new Ctor; - case 1: return new Ctor(args[0]); - case 2: return new Ctor(args[0], args[1]); - case 3: return new Ctor(args[0], args[1], args[2]); - case 4: return new Ctor(args[0], args[1], args[2], args[3]); - case 5: return new Ctor(args[0], args[1], args[2], args[3], args[4]); - case 6: return new Ctor(args[0], args[1], args[2], args[3], args[4], args[5]); - case 7: return new Ctor(args[0], args[1], args[2], args[3], args[4], args[5], args[6]); - } - var thisBinding = baseCreate(Ctor.prototype), - result = Ctor.apply(thisBinding, args); - - // Mimic the constructor's `return` behavior. - // See https://es5.github.io/#x13.2.2 for more details. - return isObject(result) ? result : thisBinding; - }; - } - - /** - * Creates a function that wraps `func` to enable currying. - * - * @private - * @param {Function} func The function to wrap. - * @param {number} bitmask The bitmask flags. See `createWrap` for more details. - * @param {number} arity The arity of `func`. - * @returns {Function} Returns the new wrapped function. - */ - function createCurry(func, bitmask, arity) { - var Ctor = createCtor(func); - - function wrapper() { - var length = arguments.length, - args = Array(length), - index = length, - placeholder = getHolder(wrapper); - - while (index--) { - args[index] = arguments[index]; - } - var holders = (length < 3 && args[0] !== placeholder && args[length - 1] !== placeholder) - ? [] - : replaceHolders(args, placeholder); - - length -= holders.length; - if (length < arity) { - return createRecurry( - func, bitmask, createHybrid, wrapper.placeholder, undefined, - args, holders, undefined, undefined, arity - length); - } - var fn = (this && this !== root && this instanceof wrapper) ? Ctor : func; - return apply(fn, this, args); - } - return wrapper; - } - - /** - * Creates a `_.find` or `_.findLast` function. - * - * @private - * @param {Function} findIndexFunc The function to find the collection index. - * @returns {Function} Returns the new find function. - */ - function createFind(findIndexFunc) { - return function(collection, predicate, fromIndex) { - var iterable = Object(collection); - if (!isArrayLike(collection)) { - var iteratee = getIteratee(predicate, 3); - collection = keys(collection); - predicate = function(key) { return iteratee(iterable[key], key, iterable); }; - } - var index = findIndexFunc(collection, predicate, fromIndex); - return index > -1 ? iterable[iteratee ? collection[index] : index] : undefined; - }; - } - - /** - * Creates a `_.flow` or `_.flowRight` function. - * - * @private - * @param {boolean} [fromRight] Specify iterating from right to left. - * @returns {Function} Returns the new flow function. - */ - function createFlow(fromRight) { - return flatRest(function(funcs) { - var length = funcs.length, - index = length, - prereq = LodashWrapper.prototype.thru; - - if (fromRight) { - funcs.reverse(); - } - while (index--) { - var func = funcs[index]; - if (typeof func != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - if (prereq && !wrapper && getFuncName(func) == 'wrapper') { - var wrapper = new LodashWrapper([], true); - } - } - index = wrapper ? index : length; - while (++index < length) { - func = funcs[index]; - - var funcName = getFuncName(func), - data = funcName == 'wrapper' ? getData(func) : undefined; - - if (data && isLaziable(data[0]) && - data[1] == (WRAP_ARY_FLAG | WRAP_CURRY_FLAG | WRAP_PARTIAL_FLAG | WRAP_REARG_FLAG) && - !data[4].length && data[9] == 1 - ) { - wrapper = wrapper[getFuncName(data[0])].apply(wrapper, data[3]); - } else { - wrapper = (func.length == 1 && isLaziable(func)) - ? wrapper[funcName]() - : wrapper.thru(func); - } - } - return function() { - var args = arguments, - value = args[0]; - - if (wrapper && args.length == 1 && isArray(value)) { - return wrapper.plant(value).value(); - } - var index = 0, - result = length ? funcs[index].apply(this, args) : value; - - while (++index < length) { - result = funcs[index].call(this, result); - } - return result; - }; - }); - } - - /** - * Creates a function that wraps `func` to invoke it with optional `this` - * binding of `thisArg`, partial application, and currying. - * - * @private - * @param {Function|string} func The function or method name to wrap. - * @param {number} bitmask The bitmask flags. See `createWrap` for more details. - * @param {*} [thisArg] The `this` binding of `func`. - * @param {Array} [partials] The arguments to prepend to those provided to - * the new function. - * @param {Array} [holders] The `partials` placeholder indexes. - * @param {Array} [partialsRight] The arguments to append to those provided - * to the new function. - * @param {Array} [holdersRight] The `partialsRight` placeholder indexes. - * @param {Array} [argPos] The argument positions of the new function. - * @param {number} [ary] The arity cap of `func`. - * @param {number} [arity] The arity of `func`. - * @returns {Function} Returns the new wrapped function. - */ - function createHybrid(func, bitmask, thisArg, partials, holders, partialsRight, holdersRight, argPos, ary, arity) { - var isAry = bitmask & WRAP_ARY_FLAG, - isBind = bitmask & WRAP_BIND_FLAG, - isBindKey = bitmask & WRAP_BIND_KEY_FLAG, - isCurried = bitmask & (WRAP_CURRY_FLAG | WRAP_CURRY_RIGHT_FLAG), - isFlip = bitmask & WRAP_FLIP_FLAG, - Ctor = isBindKey ? undefined : createCtor(func); - - function wrapper() { - var length = arguments.length, - args = Array(length), - index = length; - - while (index--) { - args[index] = arguments[index]; - } - if (isCurried) { - var placeholder = getHolder(wrapper), - holdersCount = countHolders(args, placeholder); - } - if (partials) { - args = composeArgs(args, partials, holders, isCurried); - } - if (partialsRight) { - args = composeArgsRight(args, partialsRight, holdersRight, isCurried); - } - length -= holdersCount; - if (isCurried && length < arity) { - var newHolders = replaceHolders(args, placeholder); - return createRecurry( - func, bitmask, createHybrid, wrapper.placeholder, thisArg, - args, newHolders, argPos, ary, arity - length - ); - } - var thisBinding = isBind ? thisArg : this, - fn = isBindKey ? thisBinding[func] : func; - - length = args.length; - if (argPos) { - args = reorder(args, argPos); - } else if (isFlip && length > 1) { - args.reverse(); - } - if (isAry && ary < length) { - args.length = ary; - } - if (this && this !== root && this instanceof wrapper) { - fn = Ctor || createCtor(fn); - } - return fn.apply(thisBinding, args); - } - return wrapper; - } - - /** - * Creates a function like `_.invertBy`. - * - * @private - * @param {Function} setter The function to set accumulator values. - * @param {Function} toIteratee The function to resolve iteratees. - * @returns {Function} Returns the new inverter function. - */ - function createInverter(setter, toIteratee) { - return function(object, iteratee) { - return baseInverter(object, setter, toIteratee(iteratee), {}); - }; - } - - /** - * Creates a function that performs a mathematical operation on two values. - * - * @private - * @param {Function} operator The function to perform the operation. - * @param {number} [defaultValue] The value used for `undefined` arguments. - * @returns {Function} Returns the new mathematical operation function. - */ - function createMathOperation(operator, defaultValue) { - return function(value, other) { - var result; - if (value === undefined && other === undefined) { - return defaultValue; - } - if (value !== undefined) { - result = value; - } - if (other !== undefined) { - if (result === undefined) { - return other; - } - if (typeof value == 'string' || typeof other == 'string') { - value = baseToString(value); - other = baseToString(other); - } else { - value = baseToNumber(value); - other = baseToNumber(other); - } - result = operator(value, other); - } - return result; - }; - } - - /** - * Creates a function like `_.over`. - * - * @private - * @param {Function} arrayFunc The function to iterate over iteratees. - * @returns {Function} Returns the new over function. - */ - function createOver(arrayFunc) { - return flatRest(function(iteratees) { - iteratees = arrayMap(iteratees, baseUnary(getIteratee())); - return baseRest(function(args) { - var thisArg = this; - return arrayFunc(iteratees, function(iteratee) { - return apply(iteratee, thisArg, args); - }); - }); - }); - } - - /** - * Creates the padding for `string` based on `length`. The `chars` string - * is truncated if the number of characters exceeds `length`. - * - * @private - * @param {number} length The padding length. - * @param {string} [chars=' '] The string used as padding. - * @returns {string} Returns the padding for `string`. - */ - function createPadding(length, chars) { - chars = chars === undefined ? ' ' : baseToString(chars); - - var charsLength = chars.length; - if (charsLength < 2) { - return charsLength ? baseRepeat(chars, length) : chars; - } - var result = baseRepeat(chars, nativeCeil(length / stringSize(chars))); - return hasUnicode(chars) - ? castSlice(stringToArray(result), 0, length).join('') - : result.slice(0, length); - } - - /** - * Creates a function that wraps `func` to invoke it with the `this` binding - * of `thisArg` and `partials` prepended to the arguments it receives. - * - * @private - * @param {Function} func The function to wrap. - * @param {number} bitmask The bitmask flags. See `createWrap` for more details. - * @param {*} thisArg The `this` binding of `func`. - * @param {Array} partials The arguments to prepend to those provided to - * the new function. - * @returns {Function} Returns the new wrapped function. - */ - function createPartial(func, bitmask, thisArg, partials) { - var isBind = bitmask & WRAP_BIND_FLAG, - Ctor = createCtor(func); - - function wrapper() { - var argsIndex = -1, - argsLength = arguments.length, - leftIndex = -1, - leftLength = partials.length, - args = Array(leftLength + argsLength), - fn = (this && this !== root && this instanceof wrapper) ? Ctor : func; - - while (++leftIndex < leftLength) { - args[leftIndex] = partials[leftIndex]; - } - while (argsLength--) { - args[leftIndex++] = arguments[++argsIndex]; - } - return apply(fn, isBind ? thisArg : this, args); - } - return wrapper; - } - - /** - * Creates a `_.range` or `_.rangeRight` function. - * - * @private - * @param {boolean} [fromRight] Specify iterating from right to left. - * @returns {Function} Returns the new range function. - */ - function createRange(fromRight) { - return function(start, end, step) { - if (step && typeof step != 'number' && isIterateeCall(start, end, step)) { - end = step = undefined; - } - // Ensure the sign of `-0` is preserved. - start = toFinite(start); - if (end === undefined) { - end = start; - start = 0; - } else { - end = toFinite(end); - } - step = step === undefined ? (start < end ? 1 : -1) : toFinite(step); - return baseRange(start, end, step, fromRight); - }; - } - - /** - * Creates a function that performs a relational operation on two values. - * - * @private - * @param {Function} operator The function to perform the operation. - * @returns {Function} Returns the new relational operation function. - */ - function createRelationalOperation(operator) { - return function(value, other) { - if (!(typeof value == 'string' && typeof other == 'string')) { - value = toNumber(value); - other = toNumber(other); - } - return operator(value, other); - }; - } - - /** - * Creates a function that wraps `func` to continue currying. - * - * @private - * @param {Function} func The function to wrap. - * @param {number} bitmask The bitmask flags. See `createWrap` for more details. - * @param {Function} wrapFunc The function to create the `func` wrapper. - * @param {*} placeholder The placeholder value. - * @param {*} [thisArg] The `this` binding of `func`. - * @param {Array} [partials] The arguments to prepend to those provided to - * the new function. - * @param {Array} [holders] The `partials` placeholder indexes. - * @param {Array} [argPos] The argument positions of the new function. - * @param {number} [ary] The arity cap of `func`. - * @param {number} [arity] The arity of `func`. - * @returns {Function} Returns the new wrapped function. - */ - function createRecurry(func, bitmask, wrapFunc, placeholder, thisArg, partials, holders, argPos, ary, arity) { - var isCurry = bitmask & WRAP_CURRY_FLAG, - newHolders = isCurry ? holders : undefined, - newHoldersRight = isCurry ? undefined : holders, - newPartials = isCurry ? partials : undefined, - newPartialsRight = isCurry ? undefined : partials; - - bitmask |= (isCurry ? WRAP_PARTIAL_FLAG : WRAP_PARTIAL_RIGHT_FLAG); - bitmask &= ~(isCurry ? WRAP_PARTIAL_RIGHT_FLAG : WRAP_PARTIAL_FLAG); - - if (!(bitmask & WRAP_CURRY_BOUND_FLAG)) { - bitmask &= ~(WRAP_BIND_FLAG | WRAP_BIND_KEY_FLAG); - } - var newData = [ - func, bitmask, thisArg, newPartials, newHolders, newPartialsRight, - newHoldersRight, argPos, ary, arity - ]; - - var result = wrapFunc.apply(undefined, newData); - if (isLaziable(func)) { - setData(result, newData); - } - result.placeholder = placeholder; - return setWrapToString(result, func, bitmask); - } - - /** - * Creates a function like `_.round`. - * - * @private - * @param {string} methodName The name of the `Math` method to use when rounding. - * @returns {Function} Returns the new round function. - */ - function createRound(methodName) { - var func = Math[methodName]; - return function(number, precision) { - number = toNumber(number); - precision = precision == null ? 0 : nativeMin(toInteger(precision), 292); - if (precision && nativeIsFinite(number)) { - // Shift with exponential notation to avoid floating-point issues. - // See [MDN](https://mdn.io/round#Examples) for more details. - var pair = (toString(number) + 'e').split('e'), - value = func(pair[0] + 'e' + (+pair[1] + precision)); - - pair = (toString(value) + 'e').split('e'); - return +(pair[0] + 'e' + (+pair[1] - precision)); - } - return func(number); - }; - } - - /** - * Creates a set object of `values`. - * - * @private - * @param {Array} values The values to add to the set. - * @returns {Object} Returns the new set. - */ - var createSet = !(Set && (1 / setToArray(new Set([,-0]))[1]) == INFINITY) ? noop : function(values) { - return new Set(values); - }; - - /** - * Creates a `_.toPairs` or `_.toPairsIn` function. - * - * @private - * @param {Function} keysFunc The function to get the keys of a given object. - * @returns {Function} Returns the new pairs function. - */ - function createToPairs(keysFunc) { - return function(object) { - var tag = getTag(object); - if (tag == mapTag) { - return mapToArray(object); - } - if (tag == setTag) { - return setToPairs(object); - } - return baseToPairs(object, keysFunc(object)); - }; - } - - /** - * Creates a function that either curries or invokes `func` with optional - * `this` binding and partially applied arguments. - * - * @private - * @param {Function|string} func The function or method name to wrap. - * @param {number} bitmask The bitmask flags. - * 1 - `_.bind` - * 2 - `_.bindKey` - * 4 - `_.curry` or `_.curryRight` of a bound function - * 8 - `_.curry` - * 16 - `_.curryRight` - * 32 - `_.partial` - * 64 - `_.partialRight` - * 128 - `_.rearg` - * 256 - `_.ary` - * 512 - `_.flip` - * @param {*} [thisArg] The `this` binding of `func`. - * @param {Array} [partials] The arguments to be partially applied. - * @param {Array} [holders] The `partials` placeholder indexes. - * @param {Array} [argPos] The argument positions of the new function. - * @param {number} [ary] The arity cap of `func`. - * @param {number} [arity] The arity of `func`. - * @returns {Function} Returns the new wrapped function. - */ - function createWrap(func, bitmask, thisArg, partials, holders, argPos, ary, arity) { - var isBindKey = bitmask & WRAP_BIND_KEY_FLAG; - if (!isBindKey && typeof func != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - var length = partials ? partials.length : 0; - if (!length) { - bitmask &= ~(WRAP_PARTIAL_FLAG | WRAP_PARTIAL_RIGHT_FLAG); - partials = holders = undefined; - } - ary = ary === undefined ? ary : nativeMax(toInteger(ary), 0); - arity = arity === undefined ? arity : toInteger(arity); - length -= holders ? holders.length : 0; - - if (bitmask & WRAP_PARTIAL_RIGHT_FLAG) { - var partialsRight = partials, - holdersRight = holders; - - partials = holders = undefined; - } - var data = isBindKey ? undefined : getData(func); - - var newData = [ - func, bitmask, thisArg, partials, holders, partialsRight, holdersRight, - argPos, ary, arity - ]; - - if (data) { - mergeData(newData, data); - } - func = newData[0]; - bitmask = newData[1]; - thisArg = newData[2]; - partials = newData[3]; - holders = newData[4]; - arity = newData[9] = newData[9] === undefined - ? (isBindKey ? 0 : func.length) - : nativeMax(newData[9] - length, 0); - - if (!arity && bitmask & (WRAP_CURRY_FLAG | WRAP_CURRY_RIGHT_FLAG)) { - bitmask &= ~(WRAP_CURRY_FLAG | WRAP_CURRY_RIGHT_FLAG); - } - if (!bitmask || bitmask == WRAP_BIND_FLAG) { - var result = createBind(func, bitmask, thisArg); - } else if (bitmask == WRAP_CURRY_FLAG || bitmask == WRAP_CURRY_RIGHT_FLAG) { - result = createCurry(func, bitmask, arity); - } else if ((bitmask == WRAP_PARTIAL_FLAG || bitmask == (WRAP_BIND_FLAG | WRAP_PARTIAL_FLAG)) && !holders.length) { - result = createPartial(func, bitmask, thisArg, partials); - } else { - result = createHybrid.apply(undefined, newData); - } - var setter = data ? baseSetData : setData; - return setWrapToString(setter(result, newData), func, bitmask); - } - - /** - * Used by `_.defaults` to customize its `_.assignIn` use to assign properties - * of source objects to the destination object for all destination properties - * that resolve to `undefined`. - * - * @private - * @param {*} objValue The destination value. - * @param {*} srcValue The source value. - * @param {string} key The key of the property to assign. - * @param {Object} object The parent object of `objValue`. - * @returns {*} Returns the value to assign. - */ - function customDefaultsAssignIn(objValue, srcValue, key, object) { - if (objValue === undefined || - (eq(objValue, objectProto[key]) && !hasOwnProperty.call(object, key))) { - return srcValue; - } - return objValue; - } - - /** - * Used by `_.defaultsDeep` to customize its `_.merge` use to merge source - * objects into destination objects that are passed thru. - * - * @private - * @param {*} objValue The destination value. - * @param {*} srcValue The source value. - * @param {string} key The key of the property to merge. - * @param {Object} object The parent object of `objValue`. - * @param {Object} source The parent object of `srcValue`. - * @param {Object} [stack] Tracks traversed source values and their merged - * counterparts. - * @returns {*} Returns the value to assign. - */ - function customDefaultsMerge(objValue, srcValue, key, object, source, stack) { - if (isObject(objValue) && isObject(srcValue)) { - // Recursively merge objects and arrays (susceptible to call stack limits). - stack.set(srcValue, objValue); - baseMerge(objValue, srcValue, undefined, customDefaultsMerge, stack); - stack['delete'](srcValue); - } - return objValue; - } - - /** - * Used by `_.omit` to customize its `_.cloneDeep` use to only clone plain - * objects. - * - * @private - * @param {*} value The value to inspect. - * @param {string} key The key of the property to inspect. - * @returns {*} Returns the uncloned value or `undefined` to defer cloning to `_.cloneDeep`. - */ - function customOmitClone(value) { - return isPlainObject(value) ? undefined : value; - } - - /** - * A specialized version of `baseIsEqualDeep` for arrays with support for - * partial deep comparisons. - * - * @private - * @param {Array} array The array to compare. - * @param {Array} other The other array to compare. - * @param {number} bitmask The bitmask flags. See `baseIsEqual` for more details. - * @param {Function} customizer The function to customize comparisons. - * @param {Function} equalFunc The function to determine equivalents of values. - * @param {Object} stack Tracks traversed `array` and `other` objects. - * @returns {boolean} Returns `true` if the arrays are equivalent, else `false`. - */ - function equalArrays(array, other, bitmask, customizer, equalFunc, stack) { - var isPartial = bitmask & COMPARE_PARTIAL_FLAG, - arrLength = array.length, - othLength = other.length; - - if (arrLength != othLength && !(isPartial && othLength > arrLength)) { - return false; - } - // Assume cyclic values are equal. - var stacked = stack.get(array); - if (stacked && stack.get(other)) { - return stacked == other; - } - var index = -1, - result = true, - seen = (bitmask & COMPARE_UNORDERED_FLAG) ? new SetCache : undefined; - - stack.set(array, other); - stack.set(other, array); - - // Ignore non-index properties. - while (++index < arrLength) { - var arrValue = array[index], - othValue = other[index]; - - if (customizer) { - var compared = isPartial - ? customizer(othValue, arrValue, index, other, array, stack) - : customizer(arrValue, othValue, index, array, other, stack); - } - if (compared !== undefined) { - if (compared) { - continue; - } - result = false; - break; - } - // Recursively compare arrays (susceptible to call stack limits). - if (seen) { - if (!arraySome(other, function(othValue, othIndex) { - if (!cacheHas(seen, othIndex) && - (arrValue === othValue || equalFunc(arrValue, othValue, bitmask, customizer, stack))) { - return seen.push(othIndex); - } - })) { - result = false; - break; - } - } else if (!( - arrValue === othValue || - equalFunc(arrValue, othValue, bitmask, customizer, stack) - )) { - result = false; - break; - } - } - stack['delete'](array); - stack['delete'](other); - return result; - } - - /** - * A specialized version of `baseIsEqualDeep` for comparing objects of - * the same `toStringTag`. - * - * **Note:** This function only supports comparing values with tags of - * `Boolean`, `Date`, `Error`, `Number`, `RegExp`, or `String`. - * - * @private - * @param {Object} object The object to compare. - * @param {Object} other The other object to compare. - * @param {string} tag The `toStringTag` of the objects to compare. - * @param {number} bitmask The bitmask flags. See `baseIsEqual` for more details. - * @param {Function} customizer The function to customize comparisons. - * @param {Function} equalFunc The function to determine equivalents of values. - * @param {Object} stack Tracks traversed `object` and `other` objects. - * @returns {boolean} Returns `true` if the objects are equivalent, else `false`. - */ - function equalByTag(object, other, tag, bitmask, customizer, equalFunc, stack) { - switch (tag) { - case dataViewTag: - if ((object.byteLength != other.byteLength) || - (object.byteOffset != other.byteOffset)) { - return false; - } - object = object.buffer; - other = other.buffer; - - case arrayBufferTag: - if ((object.byteLength != other.byteLength) || - !equalFunc(new Uint8Array(object), new Uint8Array(other))) { - return false; - } - return true; - - case boolTag: - case dateTag: - case numberTag: - // Coerce booleans to `1` or `0` and dates to milliseconds. - // Invalid dates are coerced to `NaN`. - return eq(+object, +other); - - case errorTag: - return object.name == other.name && object.message == other.message; - - case regexpTag: - case stringTag: - // Coerce regexes to strings and treat strings, primitives and objects, - // as equal. See http://www.ecma-international.org/ecma-262/7.0/#sec-regexp.prototype.tostring - // for more details. - return object == (other + ''); - - case mapTag: - var convert = mapToArray; - - case setTag: - var isPartial = bitmask & COMPARE_PARTIAL_FLAG; - convert || (convert = setToArray); - - if (object.size != other.size && !isPartial) { - return false; - } - // Assume cyclic values are equal. - var stacked = stack.get(object); - if (stacked) { - return stacked == other; - } - bitmask |= COMPARE_UNORDERED_FLAG; - - // Recursively compare objects (susceptible to call stack limits). - stack.set(object, other); - var result = equalArrays(convert(object), convert(other), bitmask, customizer, equalFunc, stack); - stack['delete'](object); - return result; - - case symbolTag: - if (symbolValueOf) { - return symbolValueOf.call(object) == symbolValueOf.call(other); - } - } - return false; - } - - /** - * A specialized version of `baseIsEqualDeep` for objects with support for - * partial deep comparisons. - * - * @private - * @param {Object} object The object to compare. - * @param {Object} other The other object to compare. - * @param {number} bitmask The bitmask flags. See `baseIsEqual` for more details. - * @param {Function} customizer The function to customize comparisons. - * @param {Function} equalFunc The function to determine equivalents of values. - * @param {Object} stack Tracks traversed `object` and `other` objects. - * @returns {boolean} Returns `true` if the objects are equivalent, else `false`. - */ - function equalObjects(object, other, bitmask, customizer, equalFunc, stack) { - var isPartial = bitmask & COMPARE_PARTIAL_FLAG, - objProps = getAllKeys(object), - objLength = objProps.length, - othProps = getAllKeys(other), - othLength = othProps.length; - - if (objLength != othLength && !isPartial) { - return false; - } - var index = objLength; - while (index--) { - var key = objProps[index]; - if (!(isPartial ? key in other : hasOwnProperty.call(other, key))) { - return false; - } - } - // Assume cyclic values are equal. - var stacked = stack.get(object); - if (stacked && stack.get(other)) { - return stacked == other; - } - var result = true; - stack.set(object, other); - stack.set(other, object); - - var skipCtor = isPartial; - while (++index < objLength) { - key = objProps[index]; - var objValue = object[key], - othValue = other[key]; - - if (customizer) { - var compared = isPartial - ? customizer(othValue, objValue, key, other, object, stack) - : customizer(objValue, othValue, key, object, other, stack); - } - // Recursively compare objects (susceptible to call stack limits). - if (!(compared === undefined - ? (objValue === othValue || equalFunc(objValue, othValue, bitmask, customizer, stack)) - : compared - )) { - result = false; - break; - } - skipCtor || (skipCtor = key == 'constructor'); - } - if (result && !skipCtor) { - var objCtor = object.constructor, - othCtor = other.constructor; - - // Non `Object` object instances with different constructors are not equal. - if (objCtor != othCtor && - ('constructor' in object && 'constructor' in other) && - !(typeof objCtor == 'function' && objCtor instanceof objCtor && - typeof othCtor == 'function' && othCtor instanceof othCtor)) { - result = false; - } - } - stack['delete'](object); - stack['delete'](other); - return result; - } - - /** - * A specialized version of `baseRest` which flattens the rest array. - * - * @private - * @param {Function} func The function to apply a rest parameter to. - * @returns {Function} Returns the new function. - */ - function flatRest(func) { - return setToString(overRest(func, undefined, flatten), func + ''); - } - - /** - * Creates an array of own enumerable property names and symbols of `object`. - * - * @private - * @param {Object} object The object to query. - * @returns {Array} Returns the array of property names and symbols. - */ - function getAllKeys(object) { - return baseGetAllKeys(object, keys, getSymbols); - } - - /** - * Creates an array of own and inherited enumerable property names and - * symbols of `object`. - * - * @private - * @param {Object} object The object to query. - * @returns {Array} Returns the array of property names and symbols. - */ - function getAllKeysIn(object) { - return baseGetAllKeys(object, keysIn, getSymbolsIn); - } - - /** - * Gets metadata for `func`. - * - * @private - * @param {Function} func The function to query. - * @returns {*} Returns the metadata for `func`. - */ - var getData = !metaMap ? noop : function(func) { - return metaMap.get(func); - }; - - /** - * Gets the name of `func`. - * - * @private - * @param {Function} func The function to query. - * @returns {string} Returns the function name. - */ - function getFuncName(func) { - var result = (func.name + ''), - array = realNames[result], - length = hasOwnProperty.call(realNames, result) ? array.length : 0; - - while (length--) { - var data = array[length], - otherFunc = data.func; - if (otherFunc == null || otherFunc == func) { - return data.name; - } - } - return result; - } - - /** - * Gets the argument placeholder value for `func`. - * - * @private - * @param {Function} func The function to inspect. - * @returns {*} Returns the placeholder value. - */ - function getHolder(func) { - var object = hasOwnProperty.call(lodash, 'placeholder') ? lodash : func; - return object.placeholder; - } - - /** - * Gets the appropriate "iteratee" function. If `_.iteratee` is customized, - * this function returns the custom method, otherwise it returns `baseIteratee`. - * If arguments are provided, the chosen function is invoked with them and - * its result is returned. - * - * @private - * @param {*} [value] The value to convert to an iteratee. - * @param {number} [arity] The arity of the created iteratee. - * @returns {Function} Returns the chosen function or its result. - */ - function getIteratee() { - var result = lodash.iteratee || iteratee; - result = result === iteratee ? baseIteratee : result; - return arguments.length ? result(arguments[0], arguments[1]) : result; - } - - /** - * Gets the data for `map`. - * - * @private - * @param {Object} map The map to query. - * @param {string} key The reference key. - * @returns {*} Returns the map data. - */ - function getMapData(map, key) { - var data = map.__data__; - return isKeyable(key) - ? data[typeof key == 'string' ? 'string' : 'hash'] - : data.map; - } - - /** - * Gets the property names, values, and compare flags of `object`. - * - * @private - * @param {Object} object The object to query. - * @returns {Array} Returns the match data of `object`. - */ - function getMatchData(object) { - var result = keys(object), - length = result.length; - - while (length--) { - var key = result[length], - value = object[key]; - - result[length] = [key, value, isStrictComparable(value)]; - } - return result; - } - - /** - * Gets the native function at `key` of `object`. - * - * @private - * @param {Object} object The object to query. - * @param {string} key The key of the method to get. - * @returns {*} Returns the function if it's native, else `undefined`. - */ - function getNative(object, key) { - var value = getValue(object, key); - return baseIsNative(value) ? value : undefined; - } - - /** - * A specialized version of `baseGetTag` which ignores `Symbol.toStringTag` values. - * - * @private - * @param {*} value The value to query. - * @returns {string} Returns the raw `toStringTag`. - */ - function getRawTag(value) { - var isOwn = hasOwnProperty.call(value, symToStringTag), - tag = value[symToStringTag]; - - try { - value[symToStringTag] = undefined; - var unmasked = true; - } catch (e) {} - - var result = nativeObjectToString.call(value); - if (unmasked) { - if (isOwn) { - value[symToStringTag] = tag; - } else { - delete value[symToStringTag]; - } - } - return result; - } - - /** - * Creates an array of the own enumerable symbols of `object`. - * - * @private - * @param {Object} object The object to query. - * @returns {Array} Returns the array of symbols. - */ - var getSymbols = !nativeGetSymbols ? stubArray : function(object) { - if (object == null) { - return []; - } - object = Object(object); - return arrayFilter(nativeGetSymbols(object), function(symbol) { - return propertyIsEnumerable.call(object, symbol); - }); - }; - - /** - * Creates an array of the own and inherited enumerable symbols of `object`. - * - * @private - * @param {Object} object The object to query. - * @returns {Array} Returns the array of symbols. - */ - var getSymbolsIn = !nativeGetSymbols ? stubArray : function(object) { - var result = []; - while (object) { - arrayPush(result, getSymbols(object)); - object = getPrototype(object); - } - return result; - }; - - /** - * Gets the `toStringTag` of `value`. - * - * @private - * @param {*} value The value to query. - * @returns {string} Returns the `toStringTag`. - */ - var getTag = baseGetTag; - - // Fallback for data views, maps, sets, and weak maps in IE 11 and promises in Node.js < 6. - if ((DataView && getTag(new DataView(new ArrayBuffer(1))) != dataViewTag) || - (Map && getTag(new Map) != mapTag) || - (Promise && getTag(Promise.resolve()) != promiseTag) || - (Set && getTag(new Set) != setTag) || - (WeakMap && getTag(new WeakMap) != weakMapTag)) { - getTag = function(value) { - var result = baseGetTag(value), - Ctor = result == objectTag ? value.constructor : undefined, - ctorString = Ctor ? toSource(Ctor) : ''; - - if (ctorString) { - switch (ctorString) { - case dataViewCtorString: return dataViewTag; - case mapCtorString: return mapTag; - case promiseCtorString: return promiseTag; - case setCtorString: return setTag; - case weakMapCtorString: return weakMapTag; - } - } - return result; - }; - } - - /** - * Gets the view, applying any `transforms` to the `start` and `end` positions. - * - * @private - * @param {number} start The start of the view. - * @param {number} end The end of the view. - * @param {Array} transforms The transformations to apply to the view. - * @returns {Object} Returns an object containing the `start` and `end` - * positions of the view. - */ - function getView(start, end, transforms) { - var index = -1, - length = transforms.length; - - while (++index < length) { - var data = transforms[index], - size = data.size; - - switch (data.type) { - case 'drop': start += size; break; - case 'dropRight': end -= size; break; - case 'take': end = nativeMin(end, start + size); break; - case 'takeRight': start = nativeMax(start, end - size); break; - } - } - return { 'start': start, 'end': end }; - } - - /** - * Extracts wrapper details from the `source` body comment. - * - * @private - * @param {string} source The source to inspect. - * @returns {Array} Returns the wrapper details. - */ - function getWrapDetails(source) { - var match = source.match(reWrapDetails); - return match ? match[1].split(reSplitDetails) : []; - } - - /** - * Checks if `path` exists on `object`. - * - * @private - * @param {Object} object The object to query. - * @param {Array|string} path The path to check. - * @param {Function} hasFunc The function to check properties. - * @returns {boolean} Returns `true` if `path` exists, else `false`. - */ - function hasPath(object, path, hasFunc) { - path = castPath(path, object); - - var index = -1, - length = path.length, - result = false; - - while (++index < length) { - var key = toKey(path[index]); - if (!(result = object != null && hasFunc(object, key))) { - break; - } - object = object[key]; - } - if (result || ++index != length) { - return result; - } - length = object == null ? 0 : object.length; - return !!length && isLength(length) && isIndex(key, length) && - (isArray(object) || isArguments(object)); - } - - /** - * Initializes an array clone. - * - * @private - * @param {Array} array The array to clone. - * @returns {Array} Returns the initialized clone. - */ - function initCloneArray(array) { - var length = array.length, - result = new array.constructor(length); - - // Add properties assigned by `RegExp#exec`. - if (length && typeof array[0] == 'string' && hasOwnProperty.call(array, 'index')) { - result.index = array.index; - result.input = array.input; - } - return result; - } - - /** - * Initializes an object clone. - * - * @private - * @param {Object} object The object to clone. - * @returns {Object} Returns the initialized clone. - */ - function initCloneObject(object) { - return (typeof object.constructor == 'function' && !isPrototype(object)) - ? baseCreate(getPrototype(object)) - : {}; - } - - /** - * Initializes an object clone based on its `toStringTag`. - * - * **Note:** This function only supports cloning values with tags of - * `Boolean`, `Date`, `Error`, `Map`, `Number`, `RegExp`, `Set`, or `String`. - * - * @private - * @param {Object} object The object to clone. - * @param {string} tag The `toStringTag` of the object to clone. - * @param {boolean} [isDeep] Specify a deep clone. - * @returns {Object} Returns the initialized clone. - */ - function initCloneByTag(object, tag, isDeep) { - var Ctor = object.constructor; - switch (tag) { - case arrayBufferTag: - return cloneArrayBuffer(object); - - case boolTag: - case dateTag: - return new Ctor(+object); - - case dataViewTag: - return cloneDataView(object, isDeep); - - case float32Tag: case float64Tag: - case int8Tag: case int16Tag: case int32Tag: - case uint8Tag: case uint8ClampedTag: case uint16Tag: case uint32Tag: - return cloneTypedArray(object, isDeep); - - case mapTag: - return new Ctor; - - case numberTag: - case stringTag: - return new Ctor(object); - - case regexpTag: - return cloneRegExp(object); - - case setTag: - return new Ctor; - - case symbolTag: - return cloneSymbol(object); - } - } - - /** - * Inserts wrapper `details` in a comment at the top of the `source` body. - * - * @private - * @param {string} source The source to modify. - * @returns {Array} details The details to insert. - * @returns {string} Returns the modified source. - */ - function insertWrapDetails(source, details) { - var length = details.length; - if (!length) { - return source; - } - var lastIndex = length - 1; - details[lastIndex] = (length > 1 ? '& ' : '') + details[lastIndex]; - details = details.join(length > 2 ? ', ' : ' '); - return source.replace(reWrapComment, '{\n/* [wrapped with ' + details + '] */\n'); - } - - /** - * Checks if `value` is a flattenable `arguments` object or array. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is flattenable, else `false`. - */ - function isFlattenable(value) { - return isArray(value) || isArguments(value) || - !!(spreadableSymbol && value && value[spreadableSymbol]); - } - - /** - * Checks if `value` is a valid array-like index. - * - * @private - * @param {*} value The value to check. - * @param {number} [length=MAX_SAFE_INTEGER] The upper bounds of a valid index. - * @returns {boolean} Returns `true` if `value` is a valid index, else `false`. - */ - function isIndex(value, length) { - var type = typeof value; - length = length == null ? MAX_SAFE_INTEGER : length; - - return !!length && - (type == 'number' || - (type != 'symbol' && reIsUint.test(value))) && - (value > -1 && value % 1 == 0 && value < length); - } - - /** - * Checks if the given arguments are from an iteratee call. - * - * @private - * @param {*} value The potential iteratee value argument. - * @param {*} index The potential iteratee index or key argument. - * @param {*} object The potential iteratee object argument. - * @returns {boolean} Returns `true` if the arguments are from an iteratee call, - * else `false`. - */ - function isIterateeCall(value, index, object) { - if (!isObject(object)) { - return false; - } - var type = typeof index; - if (type == 'number' - ? (isArrayLike(object) && isIndex(index, object.length)) - : (type == 'string' && index in object) - ) { - return eq(object[index], value); - } - return false; - } - - /** - * Checks if `value` is a property name and not a property path. - * - * @private - * @param {*} value The value to check. - * @param {Object} [object] The object to query keys on. - * @returns {boolean} Returns `true` if `value` is a property name, else `false`. - */ - function isKey(value, object) { - if (isArray(value)) { - return false; - } - var type = typeof value; - if (type == 'number' || type == 'symbol' || type == 'boolean' || - value == null || isSymbol(value)) { - return true; - } - return reIsPlainProp.test(value) || !reIsDeepProp.test(value) || - (object != null && value in Object(object)); - } - - /** - * Checks if `value` is suitable for use as unique object key. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is suitable, else `false`. - */ - function isKeyable(value) { - var type = typeof value; - return (type == 'string' || type == 'number' || type == 'symbol' || type == 'boolean') - ? (value !== '__proto__') - : (value === null); - } - - /** - * Checks if `func` has a lazy counterpart. - * - * @private - * @param {Function} func The function to check. - * @returns {boolean} Returns `true` if `func` has a lazy counterpart, - * else `false`. - */ - function isLaziable(func) { - var funcName = getFuncName(func), - other = lodash[funcName]; - - if (typeof other != 'function' || !(funcName in LazyWrapper.prototype)) { - return false; - } - if (func === other) { - return true; - } - var data = getData(other); - return !!data && func === data[0]; - } - - /** - * Checks if `func` has its source masked. - * - * @private - * @param {Function} func The function to check. - * @returns {boolean} Returns `true` if `func` is masked, else `false`. - */ - function isMasked(func) { - return !!maskSrcKey && (maskSrcKey in func); - } - - /** - * Checks if `func` is capable of being masked. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `func` is maskable, else `false`. - */ - var isMaskable = coreJsData ? isFunction : stubFalse; - - /** - * Checks if `value` is likely a prototype object. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a prototype, else `false`. - */ - function isPrototype(value) { - var Ctor = value && value.constructor, - proto = (typeof Ctor == 'function' && Ctor.prototype) || objectProto; - - return value === proto; - } - - /** - * Checks if `value` is suitable for strict equality comparisons, i.e. `===`. - * - * @private - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` if suitable for strict - * equality comparisons, else `false`. - */ - function isStrictComparable(value) { - return value === value && !isObject(value); - } - - /** - * A specialized version of `matchesProperty` for source values suitable - * for strict equality comparisons, i.e. `===`. - * - * @private - * @param {string} key The key of the property to get. - * @param {*} srcValue The value to match. - * @returns {Function} Returns the new spec function. - */ - function matchesStrictComparable(key, srcValue) { - return function(object) { - if (object == null) { - return false; - } - return object[key] === srcValue && - (srcValue !== undefined || (key in Object(object))); - }; - } - - /** - * A specialized version of `_.memoize` which clears the memoized function's - * cache when it exceeds `MAX_MEMOIZE_SIZE`. - * - * @private - * @param {Function} func The function to have its output memoized. - * @returns {Function} Returns the new memoized function. - */ - function memoizeCapped(func) { - var result = memoize(func, function(key) { - if (cache.size === MAX_MEMOIZE_SIZE) { - cache.clear(); - } - return key; - }); - - var cache = result.cache; - return result; - } - - /** - * Merges the function metadata of `source` into `data`. - * - * Merging metadata reduces the number of wrappers used to invoke a function. - * This is possible because methods like `_.bind`, `_.curry`, and `_.partial` - * may be applied regardless of execution order. Methods like `_.ary` and - * `_.rearg` modify function arguments, making the order in which they are - * executed important, preventing the merging of metadata. However, we make - * an exception for a safe combined case where curried functions have `_.ary` - * and or `_.rearg` applied. - * - * @private - * @param {Array} data The destination metadata. - * @param {Array} source The source metadata. - * @returns {Array} Returns `data`. - */ - function mergeData(data, source) { - var bitmask = data[1], - srcBitmask = source[1], - newBitmask = bitmask | srcBitmask, - isCommon = newBitmask < (WRAP_BIND_FLAG | WRAP_BIND_KEY_FLAG | WRAP_ARY_FLAG); - - var isCombo = - ((srcBitmask == WRAP_ARY_FLAG) && (bitmask == WRAP_CURRY_FLAG)) || - ((srcBitmask == WRAP_ARY_FLAG) && (bitmask == WRAP_REARG_FLAG) && (data[7].length <= source[8])) || - ((srcBitmask == (WRAP_ARY_FLAG | WRAP_REARG_FLAG)) && (source[7].length <= source[8]) && (bitmask == WRAP_CURRY_FLAG)); - - // Exit early if metadata can't be merged. - if (!(isCommon || isCombo)) { - return data; - } - // Use source `thisArg` if available. - if (srcBitmask & WRAP_BIND_FLAG) { - data[2] = source[2]; - // Set when currying a bound function. - newBitmask |= bitmask & WRAP_BIND_FLAG ? 0 : WRAP_CURRY_BOUND_FLAG; - } - // Compose partial arguments. - var value = source[3]; - if (value) { - var partials = data[3]; - data[3] = partials ? composeArgs(partials, value, source[4]) : value; - data[4] = partials ? replaceHolders(data[3], PLACEHOLDER) : source[4]; - } - // Compose partial right arguments. - value = source[5]; - if (value) { - partials = data[5]; - data[5] = partials ? composeArgsRight(partials, value, source[6]) : value; - data[6] = partials ? replaceHolders(data[5], PLACEHOLDER) : source[6]; - } - // Use source `argPos` if available. - value = source[7]; - if (value) { - data[7] = value; - } - // Use source `ary` if it's smaller. - if (srcBitmask & WRAP_ARY_FLAG) { - data[8] = data[8] == null ? source[8] : nativeMin(data[8], source[8]); - } - // Use source `arity` if one is not provided. - if (data[9] == null) { - data[9] = source[9]; - } - // Use source `func` and merge bitmasks. - data[0] = source[0]; - data[1] = newBitmask; - - return data; - } - - /** - * This function is like - * [`Object.keys`](http://ecma-international.org/ecma-262/7.0/#sec-object.keys) - * except that it includes inherited enumerable properties. - * - * @private - * @param {Object} object The object to query. - * @returns {Array} Returns the array of property names. - */ - function nativeKeysIn(object) { - var result = []; - if (object != null) { - for (var key in Object(object)) { - result.push(key); - } - } - return result; - } - - /** - * Converts `value` to a string using `Object.prototype.toString`. - * - * @private - * @param {*} value The value to convert. - * @returns {string} Returns the converted string. - */ - function objectToString(value) { - return nativeObjectToString.call(value); - } - - /** - * A specialized version of `baseRest` which transforms the rest array. - * - * @private - * @param {Function} func The function to apply a rest parameter to. - * @param {number} [start=func.length-1] The start position of the rest parameter. - * @param {Function} transform The rest array transform. - * @returns {Function} Returns the new function. - */ - function overRest(func, start, transform) { - start = nativeMax(start === undefined ? (func.length - 1) : start, 0); - return function() { - var args = arguments, - index = -1, - length = nativeMax(args.length - start, 0), - array = Array(length); - - while (++index < length) { - array[index] = args[start + index]; - } - index = -1; - var otherArgs = Array(start + 1); - while (++index < start) { - otherArgs[index] = args[index]; - } - otherArgs[start] = transform(array); - return apply(func, this, otherArgs); - }; - } - - /** - * Gets the parent value at `path` of `object`. - * - * @private - * @param {Object} object The object to query. - * @param {Array} path The path to get the parent value of. - * @returns {*} Returns the parent value. - */ - function parent(object, path) { - return path.length < 2 ? object : baseGet(object, baseSlice(path, 0, -1)); - } - - /** - * Reorder `array` according to the specified indexes where the element at - * the first index is assigned as the first element, the element at - * the second index is assigned as the second element, and so on. - * - * @private - * @param {Array} array The array to reorder. - * @param {Array} indexes The arranged array indexes. - * @returns {Array} Returns `array`. - */ - function reorder(array, indexes) { - var arrLength = array.length, - length = nativeMin(indexes.length, arrLength), - oldArray = copyArray(array); - - while (length--) { - var index = indexes[length]; - array[length] = isIndex(index, arrLength) ? oldArray[index] : undefined; - } - return array; - } - - /** - * Gets the value at `key`, unless `key` is "__proto__" or "constructor". - * - * @private - * @param {Object} object The object to query. - * @param {string} key The key of the property to get. - * @returns {*} Returns the property value. - */ - function safeGet(object, key) { - if (key === 'constructor' && typeof object[key] === 'function') { - return; - } - - if (key == '__proto__') { - return; - } - - return object[key]; - } - - /** - * Sets metadata for `func`. - * - * **Note:** If this function becomes hot, i.e. is invoked a lot in a short - * period of time, it will trip its breaker and transition to an identity - * function to avoid garbage collection pauses in V8. See - * [V8 issue 2070](https://bugs.chromium.org/p/v8/issues/detail?id=2070) - * for more details. - * - * @private - * @param {Function} func The function to associate metadata with. - * @param {*} data The metadata. - * @returns {Function} Returns `func`. - */ - var setData = shortOut(baseSetData); - - /** - * A simple wrapper around the global [`setTimeout`](https://mdn.io/setTimeout). - * - * @private - * @param {Function} func The function to delay. - * @param {number} wait The number of milliseconds to delay invocation. - * @returns {number|Object} Returns the timer id or timeout object. - */ - var setTimeout = ctxSetTimeout || function(func, wait) { - return root.setTimeout(func, wait); - }; - - /** - * Sets the `toString` method of `func` to return `string`. - * - * @private - * @param {Function} func The function to modify. - * @param {Function} string The `toString` result. - * @returns {Function} Returns `func`. - */ - var setToString = shortOut(baseSetToString); - - /** - * Sets the `toString` method of `wrapper` to mimic the source of `reference` - * with wrapper details in a comment at the top of the source body. - * - * @private - * @param {Function} wrapper The function to modify. - * @param {Function} reference The reference function. - * @param {number} bitmask The bitmask flags. See `createWrap` for more details. - * @returns {Function} Returns `wrapper`. - */ - function setWrapToString(wrapper, reference, bitmask) { - var source = (reference + ''); - return setToString(wrapper, insertWrapDetails(source, updateWrapDetails(getWrapDetails(source), bitmask))); - } - - /** - * Creates a function that'll short out and invoke `identity` instead - * of `func` when it's called `HOT_COUNT` or more times in `HOT_SPAN` - * milliseconds. - * - * @private - * @param {Function} func The function to restrict. - * @returns {Function} Returns the new shortable function. - */ - function shortOut(func) { - var count = 0, - lastCalled = 0; - - return function() { - var stamp = nativeNow(), - remaining = HOT_SPAN - (stamp - lastCalled); - - lastCalled = stamp; - if (remaining > 0) { - if (++count >= HOT_COUNT) { - return arguments[0]; - } - } else { - count = 0; - } - return func.apply(undefined, arguments); - }; - } - - /** - * A specialized version of `_.shuffle` which mutates and sets the size of `array`. - * - * @private - * @param {Array} array The array to shuffle. - * @param {number} [size=array.length] The size of `array`. - * @returns {Array} Returns `array`. - */ - function shuffleSelf(array, size) { - var index = -1, - length = array.length, - lastIndex = length - 1; - - size = size === undefined ? length : size; - while (++index < size) { - var rand = baseRandom(index, lastIndex), - value = array[rand]; - - array[rand] = array[index]; - array[index] = value; - } - array.length = size; - return array; - } - - /** - * Converts `string` to a property path array. - * - * @private - * @param {string} string The string to convert. - * @returns {Array} Returns the property path array. - */ - var stringToPath = memoizeCapped(function(string) { - var result = []; - if (string.charCodeAt(0) === 46 /* . */) { - result.push(''); - } - string.replace(rePropName, function(match, number, quote, subString) { - result.push(quote ? subString.replace(reEscapeChar, '$1') : (number || match)); - }); - return result; - }); - - /** - * Converts `value` to a string key if it's not a string or symbol. - * - * @private - * @param {*} value The value to inspect. - * @returns {string|symbol} Returns the key. - */ - function toKey(value) { - if (typeof value == 'string' || isSymbol(value)) { - return value; - } - var result = (value + ''); - return (result == '0' && (1 / value) == -INFINITY) ? '-0' : result; - } - - /** - * Converts `func` to its source code. - * - * @private - * @param {Function} func The function to convert. - * @returns {string} Returns the source code. - */ - function toSource(func) { - if (func != null) { - try { - return funcToString.call(func); - } catch (e) {} - try { - return (func + ''); - } catch (e) {} - } - return ''; - } - - /** - * Updates wrapper `details` based on `bitmask` flags. - * - * @private - * @returns {Array} details The details to modify. - * @param {number} bitmask The bitmask flags. See `createWrap` for more details. - * @returns {Array} Returns `details`. - */ - function updateWrapDetails(details, bitmask) { - arrayEach(wrapFlags, function(pair) { - var value = '_.' + pair[0]; - if ((bitmask & pair[1]) && !arrayIncludes(details, value)) { - details.push(value); - } - }); - return details.sort(); - } - - /** - * Creates a clone of `wrapper`. - * - * @private - * @param {Object} wrapper The wrapper to clone. - * @returns {Object} Returns the cloned wrapper. - */ - function wrapperClone(wrapper) { - if (wrapper instanceof LazyWrapper) { - return wrapper.clone(); - } - var result = new LodashWrapper(wrapper.__wrapped__, wrapper.__chain__); - result.__actions__ = copyArray(wrapper.__actions__); - result.__index__ = wrapper.__index__; - result.__values__ = wrapper.__values__; - return result; - } - - /*------------------------------------------------------------------------*/ - - /** - * Creates an array of elements split into groups the length of `size`. - * If `array` can't be split evenly, the final chunk will be the remaining - * elements. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to process. - * @param {number} [size=1] The length of each chunk - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Array} Returns the new array of chunks. - * @example - * - * _.chunk(['a', 'b', 'c', 'd'], 2); - * // => [['a', 'b'], ['c', 'd']] - * - * _.chunk(['a', 'b', 'c', 'd'], 3); - * // => [['a', 'b', 'c'], ['d']] - */ - function chunk(array, size, guard) { - if ((guard ? isIterateeCall(array, size, guard) : size === undefined)) { - size = 1; - } else { - size = nativeMax(toInteger(size), 0); - } - var length = array == null ? 0 : array.length; - if (!length || size < 1) { - return []; - } - var index = 0, - resIndex = 0, - result = Array(nativeCeil(length / size)); - - while (index < length) { - result[resIndex++] = baseSlice(array, index, (index += size)); - } - return result; - } - - /** - * Creates an array with all falsey values removed. The values `false`, `null`, - * `0`, `""`, `undefined`, and `NaN` are falsey. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to compact. - * @returns {Array} Returns the new array of filtered values. - * @example - * - * _.compact([0, 1, false, 2, '', 3]); - * // => [1, 2, 3] - */ - function compact(array) { - var index = -1, - length = array == null ? 0 : array.length, - resIndex = 0, - result = []; - - while (++index < length) { - var value = array[index]; - if (value) { - result[resIndex++] = value; - } - } - return result; - } - - /** - * Creates a new array concatenating `array` with any additional arrays - * and/or values. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to concatenate. - * @param {...*} [values] The values to concatenate. - * @returns {Array} Returns the new concatenated array. - * @example - * - * var array = [1]; - * var other = _.concat(array, 2, [3], [[4]]); - * - * console.log(other); - * // => [1, 2, 3, [4]] - * - * console.log(array); - * // => [1] - */ - function concat() { - var length = arguments.length; - if (!length) { - return []; - } - var args = Array(length - 1), - array = arguments[0], - index = length; - - while (index--) { - args[index - 1] = arguments[index]; - } - return arrayPush(isArray(array) ? copyArray(array) : [array], baseFlatten(args, 1)); - } - - /** - * Creates an array of `array` values not included in the other given arrays - * using [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * for equality comparisons. The order and references of result values are - * determined by the first array. - * - * **Note:** Unlike `_.pullAll`, this method returns a new array. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {...Array} [values] The values to exclude. - * @returns {Array} Returns the new array of filtered values. - * @see _.without, _.xor - * @example - * - * _.difference([2, 1], [2, 3]); - * // => [1] - */ - var difference = baseRest(function(array, values) { - return isArrayLikeObject(array) - ? baseDifference(array, baseFlatten(values, 1, isArrayLikeObject, true)) - : []; - }); - - /** - * This method is like `_.difference` except that it accepts `iteratee` which - * is invoked for each element of `array` and `values` to generate the criterion - * by which they're compared. The order and references of result values are - * determined by the first array. The iteratee is invoked with one argument: - * (value). - * - * **Note:** Unlike `_.pullAllBy`, this method returns a new array. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {...Array} [values] The values to exclude. - * @param {Function} [iteratee=_.identity] The iteratee invoked per element. - * @returns {Array} Returns the new array of filtered values. - * @example - * - * _.differenceBy([2.1, 1.2], [2.3, 3.4], Math.floor); - * // => [1.2] - * - * // The `_.property` iteratee shorthand. - * _.differenceBy([{ 'x': 2 }, { 'x': 1 }], [{ 'x': 1 }], 'x'); - * // => [{ 'x': 2 }] - */ - var differenceBy = baseRest(function(array, values) { - var iteratee = last(values); - if (isArrayLikeObject(iteratee)) { - iteratee = undefined; - } - return isArrayLikeObject(array) - ? baseDifference(array, baseFlatten(values, 1, isArrayLikeObject, true), getIteratee(iteratee, 2)) - : []; - }); - - /** - * This method is like `_.difference` except that it accepts `comparator` - * which is invoked to compare elements of `array` to `values`. The order and - * references of result values are determined by the first array. The comparator - * is invoked with two arguments: (arrVal, othVal). - * - * **Note:** Unlike `_.pullAllWith`, this method returns a new array. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {...Array} [values] The values to exclude. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns the new array of filtered values. - * @example - * - * var objects = [{ 'x': 1, 'y': 2 }, { 'x': 2, 'y': 1 }]; - * - * _.differenceWith(objects, [{ 'x': 1, 'y': 2 }], _.isEqual); - * // => [{ 'x': 2, 'y': 1 }] - */ - var differenceWith = baseRest(function(array, values) { - var comparator = last(values); - if (isArrayLikeObject(comparator)) { - comparator = undefined; - } - return isArrayLikeObject(array) - ? baseDifference(array, baseFlatten(values, 1, isArrayLikeObject, true), undefined, comparator) - : []; - }); - - /** - * Creates a slice of `array` with `n` elements dropped from the beginning. - * - * @static - * @memberOf _ - * @since 0.5.0 - * @category Array - * @param {Array} array The array to query. - * @param {number} [n=1] The number of elements to drop. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Array} Returns the slice of `array`. - * @example - * - * _.drop([1, 2, 3]); - * // => [2, 3] - * - * _.drop([1, 2, 3], 2); - * // => [3] - * - * _.drop([1, 2, 3], 5); - * // => [] - * - * _.drop([1, 2, 3], 0); - * // => [1, 2, 3] - */ - function drop(array, n, guard) { - var length = array == null ? 0 : array.length; - if (!length) { - return []; - } - n = (guard || n === undefined) ? 1 : toInteger(n); - return baseSlice(array, n < 0 ? 0 : n, length); - } - - /** - * Creates a slice of `array` with `n` elements dropped from the end. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to query. - * @param {number} [n=1] The number of elements to drop. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Array} Returns the slice of `array`. - * @example - * - * _.dropRight([1, 2, 3]); - * // => [1, 2] - * - * _.dropRight([1, 2, 3], 2); - * // => [1] - * - * _.dropRight([1, 2, 3], 5); - * // => [] - * - * _.dropRight([1, 2, 3], 0); - * // => [1, 2, 3] - */ - function dropRight(array, n, guard) { - var length = array == null ? 0 : array.length; - if (!length) { - return []; - } - n = (guard || n === undefined) ? 1 : toInteger(n); - n = length - n; - return baseSlice(array, 0, n < 0 ? 0 : n); - } - - /** - * Creates a slice of `array` excluding elements dropped from the end. - * Elements are dropped until `predicate` returns falsey. The predicate is - * invoked with three arguments: (value, index, array). - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to query. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {Array} Returns the slice of `array`. - * @example - * - * var users = [ - * { 'user': 'barney', 'active': true }, - * { 'user': 'fred', 'active': false }, - * { 'user': 'pebbles', 'active': false } - * ]; - * - * _.dropRightWhile(users, function(o) { return !o.active; }); - * // => objects for ['barney'] - * - * // The `_.matches` iteratee shorthand. - * _.dropRightWhile(users, { 'user': 'pebbles', 'active': false }); - * // => objects for ['barney', 'fred'] - * - * // The `_.matchesProperty` iteratee shorthand. - * _.dropRightWhile(users, ['active', false]); - * // => objects for ['barney'] - * - * // The `_.property` iteratee shorthand. - * _.dropRightWhile(users, 'active'); - * // => objects for ['barney', 'fred', 'pebbles'] - */ - function dropRightWhile(array, predicate) { - return (array && array.length) - ? baseWhile(array, getIteratee(predicate, 3), true, true) - : []; - } - - /** - * Creates a slice of `array` excluding elements dropped from the beginning. - * Elements are dropped until `predicate` returns falsey. The predicate is - * invoked with three arguments: (value, index, array). - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to query. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {Array} Returns the slice of `array`. - * @example - * - * var users = [ - * { 'user': 'barney', 'active': false }, - * { 'user': 'fred', 'active': false }, - * { 'user': 'pebbles', 'active': true } - * ]; - * - * _.dropWhile(users, function(o) { return !o.active; }); - * // => objects for ['pebbles'] - * - * // The `_.matches` iteratee shorthand. - * _.dropWhile(users, { 'user': 'barney', 'active': false }); - * // => objects for ['fred', 'pebbles'] - * - * // The `_.matchesProperty` iteratee shorthand. - * _.dropWhile(users, ['active', false]); - * // => objects for ['pebbles'] - * - * // The `_.property` iteratee shorthand. - * _.dropWhile(users, 'active'); - * // => objects for ['barney', 'fred', 'pebbles'] - */ - function dropWhile(array, predicate) { - return (array && array.length) - ? baseWhile(array, getIteratee(predicate, 3), true) - : []; - } - - /** - * Fills elements of `array` with `value` from `start` up to, but not - * including, `end`. - * - * **Note:** This method mutates `array`. - * - * @static - * @memberOf _ - * @since 3.2.0 - * @category Array - * @param {Array} array The array to fill. - * @param {*} value The value to fill `array` with. - * @param {number} [start=0] The start position. - * @param {number} [end=array.length] The end position. - * @returns {Array} Returns `array`. - * @example - * - * var array = [1, 2, 3]; - * - * _.fill(array, 'a'); - * console.log(array); - * // => ['a', 'a', 'a'] - * - * _.fill(Array(3), 2); - * // => [2, 2, 2] - * - * _.fill([4, 6, 8, 10], '*', 1, 3); - * // => [4, '*', '*', 10] - */ - function fill(array, value, start, end) { - var length = array == null ? 0 : array.length; - if (!length) { - return []; - } - if (start && typeof start != 'number' && isIterateeCall(array, value, start)) { - start = 0; - end = length; - } - return baseFill(array, value, start, end); - } - - /** - * This method is like `_.find` except that it returns the index of the first - * element `predicate` returns truthy for instead of the element itself. - * - * @static - * @memberOf _ - * @since 1.1.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @param {number} [fromIndex=0] The index to search from. - * @returns {number} Returns the index of the found element, else `-1`. - * @example - * - * var users = [ - * { 'user': 'barney', 'active': false }, - * { 'user': 'fred', 'active': false }, - * { 'user': 'pebbles', 'active': true } - * ]; - * - * _.findIndex(users, function(o) { return o.user == 'barney'; }); - * // => 0 - * - * // The `_.matches` iteratee shorthand. - * _.findIndex(users, { 'user': 'fred', 'active': false }); - * // => 1 - * - * // The `_.matchesProperty` iteratee shorthand. - * _.findIndex(users, ['active', false]); - * // => 0 - * - * // The `_.property` iteratee shorthand. - * _.findIndex(users, 'active'); - * // => 2 - */ - function findIndex(array, predicate, fromIndex) { - var length = array == null ? 0 : array.length; - if (!length) { - return -1; - } - var index = fromIndex == null ? 0 : toInteger(fromIndex); - if (index < 0) { - index = nativeMax(length + index, 0); - } - return baseFindIndex(array, getIteratee(predicate, 3), index); - } - - /** - * This method is like `_.findIndex` except that it iterates over elements - * of `collection` from right to left. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @param {number} [fromIndex=array.length-1] The index to search from. - * @returns {number} Returns the index of the found element, else `-1`. - * @example - * - * var users = [ - * { 'user': 'barney', 'active': true }, - * { 'user': 'fred', 'active': false }, - * { 'user': 'pebbles', 'active': false } - * ]; - * - * _.findLastIndex(users, function(o) { return o.user == 'pebbles'; }); - * // => 2 - * - * // The `_.matches` iteratee shorthand. - * _.findLastIndex(users, { 'user': 'barney', 'active': true }); - * // => 0 - * - * // The `_.matchesProperty` iteratee shorthand. - * _.findLastIndex(users, ['active', false]); - * // => 2 - * - * // The `_.property` iteratee shorthand. - * _.findLastIndex(users, 'active'); - * // => 0 - */ - function findLastIndex(array, predicate, fromIndex) { - var length = array == null ? 0 : array.length; - if (!length) { - return -1; - } - var index = length - 1; - if (fromIndex !== undefined) { - index = toInteger(fromIndex); - index = fromIndex < 0 - ? nativeMax(length + index, 0) - : nativeMin(index, length - 1); - } - return baseFindIndex(array, getIteratee(predicate, 3), index, true); - } - - /** - * Flattens `array` a single level deep. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to flatten. - * @returns {Array} Returns the new flattened array. - * @example - * - * _.flatten([1, [2, [3, [4]], 5]]); - * // => [1, 2, [3, [4]], 5] - */ - function flatten(array) { - var length = array == null ? 0 : array.length; - return length ? baseFlatten(array, 1) : []; - } - - /** - * Recursively flattens `array`. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to flatten. - * @returns {Array} Returns the new flattened array. - * @example - * - * _.flattenDeep([1, [2, [3, [4]], 5]]); - * // => [1, 2, 3, 4, 5] - */ - function flattenDeep(array) { - var length = array == null ? 0 : array.length; - return length ? baseFlatten(array, INFINITY) : []; - } - - /** - * Recursively flatten `array` up to `depth` times. - * - * @static - * @memberOf _ - * @since 4.4.0 - * @category Array - * @param {Array} array The array to flatten. - * @param {number} [depth=1] The maximum recursion depth. - * @returns {Array} Returns the new flattened array. - * @example - * - * var array = [1, [2, [3, [4]], 5]]; - * - * _.flattenDepth(array, 1); - * // => [1, 2, [3, [4]], 5] - * - * _.flattenDepth(array, 2); - * // => [1, 2, 3, [4], 5] - */ - function flattenDepth(array, depth) { - var length = array == null ? 0 : array.length; - if (!length) { - return []; - } - depth = depth === undefined ? 1 : toInteger(depth); - return baseFlatten(array, depth); - } - - /** - * The inverse of `_.toPairs`; this method returns an object composed - * from key-value `pairs`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} pairs The key-value pairs. - * @returns {Object} Returns the new object. - * @example - * - * _.fromPairs([['a', 1], ['b', 2]]); - * // => { 'a': 1, 'b': 2 } - */ - function fromPairs(pairs) { - var index = -1, - length = pairs == null ? 0 : pairs.length, - result = {}; - - while (++index < length) { - var pair = pairs[index]; - result[pair[0]] = pair[1]; - } - return result; - } - - /** - * Gets the first element of `array`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @alias first - * @category Array - * @param {Array} array The array to query. - * @returns {*} Returns the first element of `array`. - * @example - * - * _.head([1, 2, 3]); - * // => 1 - * - * _.head([]); - * // => undefined - */ - function head(array) { - return (array && array.length) ? array[0] : undefined; - } - - /** - * Gets the index at which the first occurrence of `value` is found in `array` - * using [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * for equality comparisons. If `fromIndex` is negative, it's used as the - * offset from the end of `array`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {*} value The value to search for. - * @param {number} [fromIndex=0] The index to search from. - * @returns {number} Returns the index of the matched value, else `-1`. - * @example - * - * _.indexOf([1, 2, 1, 2], 2); - * // => 1 - * - * // Search from the `fromIndex`. - * _.indexOf([1, 2, 1, 2], 2, 2); - * // => 3 - */ - function indexOf(array, value, fromIndex) { - var length = array == null ? 0 : array.length; - if (!length) { - return -1; - } - var index = fromIndex == null ? 0 : toInteger(fromIndex); - if (index < 0) { - index = nativeMax(length + index, 0); - } - return baseIndexOf(array, value, index); - } - - /** - * Gets all but the last element of `array`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to query. - * @returns {Array} Returns the slice of `array`. - * @example - * - * _.initial([1, 2, 3]); - * // => [1, 2] - */ - function initial(array) { - var length = array == null ? 0 : array.length; - return length ? baseSlice(array, 0, -1) : []; - } - - /** - * Creates an array of unique values that are included in all given arrays - * using [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * for equality comparisons. The order and references of result values are - * determined by the first array. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {...Array} [arrays] The arrays to inspect. - * @returns {Array} Returns the new array of intersecting values. - * @example - * - * _.intersection([2, 1], [2, 3]); - * // => [2] - */ - var intersection = baseRest(function(arrays) { - var mapped = arrayMap(arrays, castArrayLikeObject); - return (mapped.length && mapped[0] === arrays[0]) - ? baseIntersection(mapped) - : []; - }); - - /** - * This method is like `_.intersection` except that it accepts `iteratee` - * which is invoked for each element of each `arrays` to generate the criterion - * by which they're compared. The order and references of result values are - * determined by the first array. The iteratee is invoked with one argument: - * (value). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {...Array} [arrays] The arrays to inspect. - * @param {Function} [iteratee=_.identity] The iteratee invoked per element. - * @returns {Array} Returns the new array of intersecting values. - * @example - * - * _.intersectionBy([2.1, 1.2], [2.3, 3.4], Math.floor); - * // => [2.1] - * - * // The `_.property` iteratee shorthand. - * _.intersectionBy([{ 'x': 1 }], [{ 'x': 2 }, { 'x': 1 }], 'x'); - * // => [{ 'x': 1 }] - */ - var intersectionBy = baseRest(function(arrays) { - var iteratee = last(arrays), - mapped = arrayMap(arrays, castArrayLikeObject); - - if (iteratee === last(mapped)) { - iteratee = undefined; - } else { - mapped.pop(); - } - return (mapped.length && mapped[0] === arrays[0]) - ? baseIntersection(mapped, getIteratee(iteratee, 2)) - : []; - }); - - /** - * This method is like `_.intersection` except that it accepts `comparator` - * which is invoked to compare elements of `arrays`. The order and references - * of result values are determined by the first array. The comparator is - * invoked with two arguments: (arrVal, othVal). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {...Array} [arrays] The arrays to inspect. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns the new array of intersecting values. - * @example - * - * var objects = [{ 'x': 1, 'y': 2 }, { 'x': 2, 'y': 1 }]; - * var others = [{ 'x': 1, 'y': 1 }, { 'x': 1, 'y': 2 }]; - * - * _.intersectionWith(objects, others, _.isEqual); - * // => [{ 'x': 1, 'y': 2 }] - */ - var intersectionWith = baseRest(function(arrays) { - var comparator = last(arrays), - mapped = arrayMap(arrays, castArrayLikeObject); - - comparator = typeof comparator == 'function' ? comparator : undefined; - if (comparator) { - mapped.pop(); - } - return (mapped.length && mapped[0] === arrays[0]) - ? baseIntersection(mapped, undefined, comparator) - : []; - }); - - /** - * Converts all elements in `array` into a string separated by `separator`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to convert. - * @param {string} [separator=','] The element separator. - * @returns {string} Returns the joined string. - * @example - * - * _.join(['a', 'b', 'c'], '~'); - * // => 'a~b~c' - */ - function join(array, separator) { - return array == null ? '' : nativeJoin.call(array, separator); - } - - /** - * Gets the last element of `array`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to query. - * @returns {*} Returns the last element of `array`. - * @example - * - * _.last([1, 2, 3]); - * // => 3 - */ - function last(array) { - var length = array == null ? 0 : array.length; - return length ? array[length - 1] : undefined; - } - - /** - * This method is like `_.indexOf` except that it iterates over elements of - * `array` from right to left. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {*} value The value to search for. - * @param {number} [fromIndex=array.length-1] The index to search from. - * @returns {number} Returns the index of the matched value, else `-1`. - * @example - * - * _.lastIndexOf([1, 2, 1, 2], 2); - * // => 3 - * - * // Search from the `fromIndex`. - * _.lastIndexOf([1, 2, 1, 2], 2, 2); - * // => 1 - */ - function lastIndexOf(array, value, fromIndex) { - var length = array == null ? 0 : array.length; - if (!length) { - return -1; - } - var index = length; - if (fromIndex !== undefined) { - index = toInteger(fromIndex); - index = index < 0 ? nativeMax(length + index, 0) : nativeMin(index, length - 1); - } - return value === value - ? strictLastIndexOf(array, value, index) - : baseFindIndex(array, baseIsNaN, index, true); - } - - /** - * Gets the element at index `n` of `array`. If `n` is negative, the nth - * element from the end is returned. - * - * @static - * @memberOf _ - * @since 4.11.0 - * @category Array - * @param {Array} array The array to query. - * @param {number} [n=0] The index of the element to return. - * @returns {*} Returns the nth element of `array`. - * @example - * - * var array = ['a', 'b', 'c', 'd']; - * - * _.nth(array, 1); - * // => 'b' - * - * _.nth(array, -2); - * // => 'c'; - */ - function nth(array, n) { - return (array && array.length) ? baseNth(array, toInteger(n)) : undefined; - } - - /** - * Removes all given values from `array` using - * [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * for equality comparisons. - * - * **Note:** Unlike `_.without`, this method mutates `array`. Use `_.remove` - * to remove elements from an array by predicate. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @category Array - * @param {Array} array The array to modify. - * @param {...*} [values] The values to remove. - * @returns {Array} Returns `array`. - * @example - * - * var array = ['a', 'b', 'c', 'a', 'b', 'c']; - * - * _.pull(array, 'a', 'c'); - * console.log(array); - * // => ['b', 'b'] - */ - var pull = baseRest(pullAll); - - /** - * This method is like `_.pull` except that it accepts an array of values to remove. - * - * **Note:** Unlike `_.difference`, this method mutates `array`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to modify. - * @param {Array} values The values to remove. - * @returns {Array} Returns `array`. - * @example - * - * var array = ['a', 'b', 'c', 'a', 'b', 'c']; - * - * _.pullAll(array, ['a', 'c']); - * console.log(array); - * // => ['b', 'b'] - */ - function pullAll(array, values) { - return (array && array.length && values && values.length) - ? basePullAll(array, values) - : array; - } - - /** - * This method is like `_.pullAll` except that it accepts `iteratee` which is - * invoked for each element of `array` and `values` to generate the criterion - * by which they're compared. The iteratee is invoked with one argument: (value). - * - * **Note:** Unlike `_.differenceBy`, this method mutates `array`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to modify. - * @param {Array} values The values to remove. - * @param {Function} [iteratee=_.identity] The iteratee invoked per element. - * @returns {Array} Returns `array`. - * @example - * - * var array = [{ 'x': 1 }, { 'x': 2 }, { 'x': 3 }, { 'x': 1 }]; - * - * _.pullAllBy(array, [{ 'x': 1 }, { 'x': 3 }], 'x'); - * console.log(array); - * // => [{ 'x': 2 }] - */ - function pullAllBy(array, values, iteratee) { - return (array && array.length && values && values.length) - ? basePullAll(array, values, getIteratee(iteratee, 2)) - : array; - } - - /** - * This method is like `_.pullAll` except that it accepts `comparator` which - * is invoked to compare elements of `array` to `values`. The comparator is - * invoked with two arguments: (arrVal, othVal). - * - * **Note:** Unlike `_.differenceWith`, this method mutates `array`. - * - * @static - * @memberOf _ - * @since 4.6.0 - * @category Array - * @param {Array} array The array to modify. - * @param {Array} values The values to remove. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns `array`. - * @example - * - * var array = [{ 'x': 1, 'y': 2 }, { 'x': 3, 'y': 4 }, { 'x': 5, 'y': 6 }]; - * - * _.pullAllWith(array, [{ 'x': 3, 'y': 4 }], _.isEqual); - * console.log(array); - * // => [{ 'x': 1, 'y': 2 }, { 'x': 5, 'y': 6 }] - */ - function pullAllWith(array, values, comparator) { - return (array && array.length && values && values.length) - ? basePullAll(array, values, undefined, comparator) - : array; - } - - /** - * Removes elements from `array` corresponding to `indexes` and returns an - * array of removed elements. - * - * **Note:** Unlike `_.at`, this method mutates `array`. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to modify. - * @param {...(number|number[])} [indexes] The indexes of elements to remove. - * @returns {Array} Returns the new array of removed elements. - * @example - * - * var array = ['a', 'b', 'c', 'd']; - * var pulled = _.pullAt(array, [1, 3]); - * - * console.log(array); - * // => ['a', 'c'] - * - * console.log(pulled); - * // => ['b', 'd'] - */ - var pullAt = flatRest(function(array, indexes) { - var length = array == null ? 0 : array.length, - result = baseAt(array, indexes); - - basePullAt(array, arrayMap(indexes, function(index) { - return isIndex(index, length) ? +index : index; - }).sort(compareAscending)); - - return result; - }); - - /** - * Removes all elements from `array` that `predicate` returns truthy for - * and returns an array of the removed elements. The predicate is invoked - * with three arguments: (value, index, array). - * - * **Note:** Unlike `_.filter`, this method mutates `array`. Use `_.pull` - * to pull elements from an array by value. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @category Array - * @param {Array} array The array to modify. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {Array} Returns the new array of removed elements. - * @example - * - * var array = [1, 2, 3, 4]; - * var evens = _.remove(array, function(n) { - * return n % 2 == 0; - * }); - * - * console.log(array); - * // => [1, 3] - * - * console.log(evens); - * // => [2, 4] - */ - function remove(array, predicate) { - var result = []; - if (!(array && array.length)) { - return result; - } - var index = -1, - indexes = [], - length = array.length; - - predicate = getIteratee(predicate, 3); - while (++index < length) { - var value = array[index]; - if (predicate(value, index, array)) { - result.push(value); - indexes.push(index); - } - } - basePullAt(array, indexes); - return result; - } - - /** - * Reverses `array` so that the first element becomes the last, the second - * element becomes the second to last, and so on. - * - * **Note:** This method mutates `array` and is based on - * [`Array#reverse`](https://mdn.io/Array/reverse). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to modify. - * @returns {Array} Returns `array`. - * @example - * - * var array = [1, 2, 3]; - * - * _.reverse(array); - * // => [3, 2, 1] - * - * console.log(array); - * // => [3, 2, 1] - */ - function reverse(array) { - return array == null ? array : nativeReverse.call(array); - } - - /** - * Creates a slice of `array` from `start` up to, but not including, `end`. - * - * **Note:** This method is used instead of - * [`Array#slice`](https://mdn.io/Array/slice) to ensure dense arrays are - * returned. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to slice. - * @param {number} [start=0] The start position. - * @param {number} [end=array.length] The end position. - * @returns {Array} Returns the slice of `array`. - */ - function slice(array, start, end) { - var length = array == null ? 0 : array.length; - if (!length) { - return []; - } - if (end && typeof end != 'number' && isIterateeCall(array, start, end)) { - start = 0; - end = length; - } - else { - start = start == null ? 0 : toInteger(start); - end = end === undefined ? length : toInteger(end); - } - return baseSlice(array, start, end); - } - - /** - * Uses a binary search to determine the lowest index at which `value` - * should be inserted into `array` in order to maintain its sort order. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The sorted array to inspect. - * @param {*} value The value to evaluate. - * @returns {number} Returns the index at which `value` should be inserted - * into `array`. - * @example - * - * _.sortedIndex([30, 50], 40); - * // => 1 - */ - function sortedIndex(array, value) { - return baseSortedIndex(array, value); - } - - /** - * This method is like `_.sortedIndex` except that it accepts `iteratee` - * which is invoked for `value` and each element of `array` to compute their - * sort ranking. The iteratee is invoked with one argument: (value). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The sorted array to inspect. - * @param {*} value The value to evaluate. - * @param {Function} [iteratee=_.identity] The iteratee invoked per element. - * @returns {number} Returns the index at which `value` should be inserted - * into `array`. - * @example - * - * var objects = [{ 'x': 4 }, { 'x': 5 }]; - * - * _.sortedIndexBy(objects, { 'x': 4 }, function(o) { return o.x; }); - * // => 0 - * - * // The `_.property` iteratee shorthand. - * _.sortedIndexBy(objects, { 'x': 4 }, 'x'); - * // => 0 - */ - function sortedIndexBy(array, value, iteratee) { - return baseSortedIndexBy(array, value, getIteratee(iteratee, 2)); - } - - /** - * This method is like `_.indexOf` except that it performs a binary - * search on a sorted `array`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {*} value The value to search for. - * @returns {number} Returns the index of the matched value, else `-1`. - * @example - * - * _.sortedIndexOf([4, 5, 5, 5, 6], 5); - * // => 1 - */ - function sortedIndexOf(array, value) { - var length = array == null ? 0 : array.length; - if (length) { - var index = baseSortedIndex(array, value); - if (index < length && eq(array[index], value)) { - return index; - } - } - return -1; - } - - /** - * This method is like `_.sortedIndex` except that it returns the highest - * index at which `value` should be inserted into `array` in order to - * maintain its sort order. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The sorted array to inspect. - * @param {*} value The value to evaluate. - * @returns {number} Returns the index at which `value` should be inserted - * into `array`. - * @example - * - * _.sortedLastIndex([4, 5, 5, 5, 6], 5); - * // => 4 - */ - function sortedLastIndex(array, value) { - return baseSortedIndex(array, value, true); - } - - /** - * This method is like `_.sortedLastIndex` except that it accepts `iteratee` - * which is invoked for `value` and each element of `array` to compute their - * sort ranking. The iteratee is invoked with one argument: (value). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The sorted array to inspect. - * @param {*} value The value to evaluate. - * @param {Function} [iteratee=_.identity] The iteratee invoked per element. - * @returns {number} Returns the index at which `value` should be inserted - * into `array`. - * @example - * - * var objects = [{ 'x': 4 }, { 'x': 5 }]; - * - * _.sortedLastIndexBy(objects, { 'x': 4 }, function(o) { return o.x; }); - * // => 1 - * - * // The `_.property` iteratee shorthand. - * _.sortedLastIndexBy(objects, { 'x': 4 }, 'x'); - * // => 1 - */ - function sortedLastIndexBy(array, value, iteratee) { - return baseSortedIndexBy(array, value, getIteratee(iteratee, 2), true); - } - - /** - * This method is like `_.lastIndexOf` except that it performs a binary - * search on a sorted `array`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {*} value The value to search for. - * @returns {number} Returns the index of the matched value, else `-1`. - * @example - * - * _.sortedLastIndexOf([4, 5, 5, 5, 6], 5); - * // => 3 - */ - function sortedLastIndexOf(array, value) { - var length = array == null ? 0 : array.length; - if (length) { - var index = baseSortedIndex(array, value, true) - 1; - if (eq(array[index], value)) { - return index; - } - } - return -1; - } - - /** - * This method is like `_.uniq` except that it's designed and optimized - * for sorted arrays. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to inspect. - * @returns {Array} Returns the new duplicate free array. - * @example - * - * _.sortedUniq([1, 1, 2]); - * // => [1, 2] - */ - function sortedUniq(array) { - return (array && array.length) - ? baseSortedUniq(array) - : []; - } - - /** - * This method is like `_.uniqBy` except that it's designed and optimized - * for sorted arrays. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {Function} [iteratee] The iteratee invoked per element. - * @returns {Array} Returns the new duplicate free array. - * @example - * - * _.sortedUniqBy([1.1, 1.2, 2.3, 2.4], Math.floor); - * // => [1.1, 2.3] - */ - function sortedUniqBy(array, iteratee) { - return (array && array.length) - ? baseSortedUniq(array, getIteratee(iteratee, 2)) - : []; - } - - /** - * Gets all but the first element of `array`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to query. - * @returns {Array} Returns the slice of `array`. - * @example - * - * _.tail([1, 2, 3]); - * // => [2, 3] - */ - function tail(array) { - var length = array == null ? 0 : array.length; - return length ? baseSlice(array, 1, length) : []; - } - - /** - * Creates a slice of `array` with `n` elements taken from the beginning. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to query. - * @param {number} [n=1] The number of elements to take. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Array} Returns the slice of `array`. - * @example - * - * _.take([1, 2, 3]); - * // => [1] - * - * _.take([1, 2, 3], 2); - * // => [1, 2] - * - * _.take([1, 2, 3], 5); - * // => [1, 2, 3] - * - * _.take([1, 2, 3], 0); - * // => [] - */ - function take(array, n, guard) { - if (!(array && array.length)) { - return []; - } - n = (guard || n === undefined) ? 1 : toInteger(n); - return baseSlice(array, 0, n < 0 ? 0 : n); - } - - /** - * Creates a slice of `array` with `n` elements taken from the end. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to query. - * @param {number} [n=1] The number of elements to take. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Array} Returns the slice of `array`. - * @example - * - * _.takeRight([1, 2, 3]); - * // => [3] - * - * _.takeRight([1, 2, 3], 2); - * // => [2, 3] - * - * _.takeRight([1, 2, 3], 5); - * // => [1, 2, 3] - * - * _.takeRight([1, 2, 3], 0); - * // => [] - */ - function takeRight(array, n, guard) { - var length = array == null ? 0 : array.length; - if (!length) { - return []; - } - n = (guard || n === undefined) ? 1 : toInteger(n); - n = length - n; - return baseSlice(array, n < 0 ? 0 : n, length); - } - - /** - * Creates a slice of `array` with elements taken from the end. Elements are - * taken until `predicate` returns falsey. The predicate is invoked with - * three arguments: (value, index, array). - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to query. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {Array} Returns the slice of `array`. - * @example - * - * var users = [ - * { 'user': 'barney', 'active': true }, - * { 'user': 'fred', 'active': false }, - * { 'user': 'pebbles', 'active': false } - * ]; - * - * _.takeRightWhile(users, function(o) { return !o.active; }); - * // => objects for ['fred', 'pebbles'] - * - * // The `_.matches` iteratee shorthand. - * _.takeRightWhile(users, { 'user': 'pebbles', 'active': false }); - * // => objects for ['pebbles'] - * - * // The `_.matchesProperty` iteratee shorthand. - * _.takeRightWhile(users, ['active', false]); - * // => objects for ['fred', 'pebbles'] - * - * // The `_.property` iteratee shorthand. - * _.takeRightWhile(users, 'active'); - * // => [] - */ - function takeRightWhile(array, predicate) { - return (array && array.length) - ? baseWhile(array, getIteratee(predicate, 3), false, true) - : []; - } - - /** - * Creates a slice of `array` with elements taken from the beginning. Elements - * are taken until `predicate` returns falsey. The predicate is invoked with - * three arguments: (value, index, array). - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Array - * @param {Array} array The array to query. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {Array} Returns the slice of `array`. - * @example - * - * var users = [ - * { 'user': 'barney', 'active': false }, - * { 'user': 'fred', 'active': false }, - * { 'user': 'pebbles', 'active': true } - * ]; - * - * _.takeWhile(users, function(o) { return !o.active; }); - * // => objects for ['barney', 'fred'] - * - * // The `_.matches` iteratee shorthand. - * _.takeWhile(users, { 'user': 'barney', 'active': false }); - * // => objects for ['barney'] - * - * // The `_.matchesProperty` iteratee shorthand. - * _.takeWhile(users, ['active', false]); - * // => objects for ['barney', 'fred'] - * - * // The `_.property` iteratee shorthand. - * _.takeWhile(users, 'active'); - * // => [] - */ - function takeWhile(array, predicate) { - return (array && array.length) - ? baseWhile(array, getIteratee(predicate, 3)) - : []; - } - - /** - * Creates an array of unique values, in order, from all given arrays using - * [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * for equality comparisons. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {...Array} [arrays] The arrays to inspect. - * @returns {Array} Returns the new array of combined values. - * @example - * - * _.union([2], [1, 2]); - * // => [2, 1] - */ - var union = baseRest(function(arrays) { - return baseUniq(baseFlatten(arrays, 1, isArrayLikeObject, true)); - }); - - /** - * This method is like `_.union` except that it accepts `iteratee` which is - * invoked for each element of each `arrays` to generate the criterion by - * which uniqueness is computed. Result values are chosen from the first - * array in which the value occurs. The iteratee is invoked with one argument: - * (value). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {...Array} [arrays] The arrays to inspect. - * @param {Function} [iteratee=_.identity] The iteratee invoked per element. - * @returns {Array} Returns the new array of combined values. - * @example - * - * _.unionBy([2.1], [1.2, 2.3], Math.floor); - * // => [2.1, 1.2] - * - * // The `_.property` iteratee shorthand. - * _.unionBy([{ 'x': 1 }], [{ 'x': 2 }, { 'x': 1 }], 'x'); - * // => [{ 'x': 1 }, { 'x': 2 }] - */ - var unionBy = baseRest(function(arrays) { - var iteratee = last(arrays); - if (isArrayLikeObject(iteratee)) { - iteratee = undefined; - } - return baseUniq(baseFlatten(arrays, 1, isArrayLikeObject, true), getIteratee(iteratee, 2)); - }); - - /** - * This method is like `_.union` except that it accepts `comparator` which - * is invoked to compare elements of `arrays`. Result values are chosen from - * the first array in which the value occurs. The comparator is invoked - * with two arguments: (arrVal, othVal). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {...Array} [arrays] The arrays to inspect. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns the new array of combined values. - * @example - * - * var objects = [{ 'x': 1, 'y': 2 }, { 'x': 2, 'y': 1 }]; - * var others = [{ 'x': 1, 'y': 1 }, { 'x': 1, 'y': 2 }]; - * - * _.unionWith(objects, others, _.isEqual); - * // => [{ 'x': 1, 'y': 2 }, { 'x': 2, 'y': 1 }, { 'x': 1, 'y': 1 }] - */ - var unionWith = baseRest(function(arrays) { - var comparator = last(arrays); - comparator = typeof comparator == 'function' ? comparator : undefined; - return baseUniq(baseFlatten(arrays, 1, isArrayLikeObject, true), undefined, comparator); - }); - - /** - * Creates a duplicate-free version of an array, using - * [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * for equality comparisons, in which only the first occurrence of each element - * is kept. The order of result values is determined by the order they occur - * in the array. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to inspect. - * @returns {Array} Returns the new duplicate free array. - * @example - * - * _.uniq([2, 1, 2]); - * // => [2, 1] - */ - function uniq(array) { - return (array && array.length) ? baseUniq(array) : []; - } - - /** - * This method is like `_.uniq` except that it accepts `iteratee` which is - * invoked for each element in `array` to generate the criterion by which - * uniqueness is computed. The order of result values is determined by the - * order they occur in the array. The iteratee is invoked with one argument: - * (value). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {Function} [iteratee=_.identity] The iteratee invoked per element. - * @returns {Array} Returns the new duplicate free array. - * @example - * - * _.uniqBy([2.1, 1.2, 2.3], Math.floor); - * // => [2.1, 1.2] - * - * // The `_.property` iteratee shorthand. - * _.uniqBy([{ 'x': 1 }, { 'x': 2 }, { 'x': 1 }], 'x'); - * // => [{ 'x': 1 }, { 'x': 2 }] - */ - function uniqBy(array, iteratee) { - return (array && array.length) ? baseUniq(array, getIteratee(iteratee, 2)) : []; - } - - /** - * This method is like `_.uniq` except that it accepts `comparator` which - * is invoked to compare elements of `array`. The order of result values is - * determined by the order they occur in the array.The comparator is invoked - * with two arguments: (arrVal, othVal). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns the new duplicate free array. - * @example - * - * var objects = [{ 'x': 1, 'y': 2 }, { 'x': 2, 'y': 1 }, { 'x': 1, 'y': 2 }]; - * - * _.uniqWith(objects, _.isEqual); - * // => [{ 'x': 1, 'y': 2 }, { 'x': 2, 'y': 1 }] - */ - function uniqWith(array, comparator) { - comparator = typeof comparator == 'function' ? comparator : undefined; - return (array && array.length) ? baseUniq(array, undefined, comparator) : []; - } - - /** - * This method is like `_.zip` except that it accepts an array of grouped - * elements and creates an array regrouping the elements to their pre-zip - * configuration. - * - * @static - * @memberOf _ - * @since 1.2.0 - * @category Array - * @param {Array} array The array of grouped elements to process. - * @returns {Array} Returns the new array of regrouped elements. - * @example - * - * var zipped = _.zip(['a', 'b'], [1, 2], [true, false]); - * // => [['a', 1, true], ['b', 2, false]] - * - * _.unzip(zipped); - * // => [['a', 'b'], [1, 2], [true, false]] - */ - function unzip(array) { - if (!(array && array.length)) { - return []; - } - var length = 0; - array = arrayFilter(array, function(group) { - if (isArrayLikeObject(group)) { - length = nativeMax(group.length, length); - return true; - } - }); - return baseTimes(length, function(index) { - return arrayMap(array, baseProperty(index)); - }); - } - - /** - * This method is like `_.unzip` except that it accepts `iteratee` to specify - * how regrouped values should be combined. The iteratee is invoked with the - * elements of each group: (...group). - * - * @static - * @memberOf _ - * @since 3.8.0 - * @category Array - * @param {Array} array The array of grouped elements to process. - * @param {Function} [iteratee=_.identity] The function to combine - * regrouped values. - * @returns {Array} Returns the new array of regrouped elements. - * @example - * - * var zipped = _.zip([1, 2], [10, 20], [100, 200]); - * // => [[1, 10, 100], [2, 20, 200]] - * - * _.unzipWith(zipped, _.add); - * // => [3, 30, 300] - */ - function unzipWith(array, iteratee) { - if (!(array && array.length)) { - return []; - } - var result = unzip(array); - if (iteratee == null) { - return result; - } - return arrayMap(result, function(group) { - return apply(iteratee, undefined, group); - }); - } - - /** - * Creates an array excluding all given values using - * [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * for equality comparisons. - * - * **Note:** Unlike `_.pull`, this method returns a new array. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {Array} array The array to inspect. - * @param {...*} [values] The values to exclude. - * @returns {Array} Returns the new array of filtered values. - * @see _.difference, _.xor - * @example - * - * _.without([2, 1, 2, 3], 1, 2); - * // => [3] - */ - var without = baseRest(function(array, values) { - return isArrayLikeObject(array) - ? baseDifference(array, values) - : []; - }); - - /** - * Creates an array of unique values that is the - * [symmetric difference](https://en.wikipedia.org/wiki/Symmetric_difference) - * of the given arrays. The order of result values is determined by the order - * they occur in the arrays. - * - * @static - * @memberOf _ - * @since 2.4.0 - * @category Array - * @param {...Array} [arrays] The arrays to inspect. - * @returns {Array} Returns the new array of filtered values. - * @see _.difference, _.without - * @example - * - * _.xor([2, 1], [2, 3]); - * // => [1, 3] - */ - var xor = baseRest(function(arrays) { - return baseXor(arrayFilter(arrays, isArrayLikeObject)); - }); - - /** - * This method is like `_.xor` except that it accepts `iteratee` which is - * invoked for each element of each `arrays` to generate the criterion by - * which by which they're compared. The order of result values is determined - * by the order they occur in the arrays. The iteratee is invoked with one - * argument: (value). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {...Array} [arrays] The arrays to inspect. - * @param {Function} [iteratee=_.identity] The iteratee invoked per element. - * @returns {Array} Returns the new array of filtered values. - * @example - * - * _.xorBy([2.1, 1.2], [2.3, 3.4], Math.floor); - * // => [1.2, 3.4] - * - * // The `_.property` iteratee shorthand. - * _.xorBy([{ 'x': 1 }], [{ 'x': 2 }, { 'x': 1 }], 'x'); - * // => [{ 'x': 2 }] - */ - var xorBy = baseRest(function(arrays) { - var iteratee = last(arrays); - if (isArrayLikeObject(iteratee)) { - iteratee = undefined; - } - return baseXor(arrayFilter(arrays, isArrayLikeObject), getIteratee(iteratee, 2)); - }); - - /** - * This method is like `_.xor` except that it accepts `comparator` which is - * invoked to compare elements of `arrays`. The order of result values is - * determined by the order they occur in the arrays. The comparator is invoked - * with two arguments: (arrVal, othVal). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Array - * @param {...Array} [arrays] The arrays to inspect. - * @param {Function} [comparator] The comparator invoked per element. - * @returns {Array} Returns the new array of filtered values. - * @example - * - * var objects = [{ 'x': 1, 'y': 2 }, { 'x': 2, 'y': 1 }]; - * var others = [{ 'x': 1, 'y': 1 }, { 'x': 1, 'y': 2 }]; - * - * _.xorWith(objects, others, _.isEqual); - * // => [{ 'x': 2, 'y': 1 }, { 'x': 1, 'y': 1 }] - */ - var xorWith = baseRest(function(arrays) { - var comparator = last(arrays); - comparator = typeof comparator == 'function' ? comparator : undefined; - return baseXor(arrayFilter(arrays, isArrayLikeObject), undefined, comparator); - }); - - /** - * Creates an array of grouped elements, the first of which contains the - * first elements of the given arrays, the second of which contains the - * second elements of the given arrays, and so on. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Array - * @param {...Array} [arrays] The arrays to process. - * @returns {Array} Returns the new array of grouped elements. - * @example - * - * _.zip(['a', 'b'], [1, 2], [true, false]); - * // => [['a', 1, true], ['b', 2, false]] - */ - var zip = baseRest(unzip); - - /** - * This method is like `_.fromPairs` except that it accepts two arrays, - * one of property identifiers and one of corresponding values. - * - * @static - * @memberOf _ - * @since 0.4.0 - * @category Array - * @param {Array} [props=[]] The property identifiers. - * @param {Array} [values=[]] The property values. - * @returns {Object} Returns the new object. - * @example - * - * _.zipObject(['a', 'b'], [1, 2]); - * // => { 'a': 1, 'b': 2 } - */ - function zipObject(props, values) { - return baseZipObject(props || [], values || [], assignValue); - } - - /** - * This method is like `_.zipObject` except that it supports property paths. - * - * @static - * @memberOf _ - * @since 4.1.0 - * @category Array - * @param {Array} [props=[]] The property identifiers. - * @param {Array} [values=[]] The property values. - * @returns {Object} Returns the new object. - * @example - * - * _.zipObjectDeep(['a.b[0].c', 'a.b[1].d'], [1, 2]); - * // => { 'a': { 'b': [{ 'c': 1 }, { 'd': 2 }] } } - */ - function zipObjectDeep(props, values) { - return baseZipObject(props || [], values || [], baseSet); - } - - /** - * This method is like `_.zip` except that it accepts `iteratee` to specify - * how grouped values should be combined. The iteratee is invoked with the - * elements of each group: (...group). - * - * @static - * @memberOf _ - * @since 3.8.0 - * @category Array - * @param {...Array} [arrays] The arrays to process. - * @param {Function} [iteratee=_.identity] The function to combine - * grouped values. - * @returns {Array} Returns the new array of grouped elements. - * @example - * - * _.zipWith([1, 2], [10, 20], [100, 200], function(a, b, c) { - * return a + b + c; - * }); - * // => [111, 222] - */ - var zipWith = baseRest(function(arrays) { - var length = arrays.length, - iteratee = length > 1 ? arrays[length - 1] : undefined; - - iteratee = typeof iteratee == 'function' ? (arrays.pop(), iteratee) : undefined; - return unzipWith(arrays, iteratee); - }); - - /*------------------------------------------------------------------------*/ - - /** - * Creates a `lodash` wrapper instance that wraps `value` with explicit method - * chain sequences enabled. The result of such sequences must be unwrapped - * with `_#value`. - * - * @static - * @memberOf _ - * @since 1.3.0 - * @category Seq - * @param {*} value The value to wrap. - * @returns {Object} Returns the new `lodash` wrapper instance. - * @example - * - * var users = [ - * { 'user': 'barney', 'age': 36 }, - * { 'user': 'fred', 'age': 40 }, - * { 'user': 'pebbles', 'age': 1 } - * ]; - * - * var youngest = _ - * .chain(users) - * .sortBy('age') - * .map(function(o) { - * return o.user + ' is ' + o.age; - * }) - * .head() - * .value(); - * // => 'pebbles is 1' - */ - function chain(value) { - var result = lodash(value); - result.__chain__ = true; - return result; - } - - /** - * This method invokes `interceptor` and returns `value`. The interceptor - * is invoked with one argument; (value). The purpose of this method is to - * "tap into" a method chain sequence in order to modify intermediate results. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Seq - * @param {*} value The value to provide to `interceptor`. - * @param {Function} interceptor The function to invoke. - * @returns {*} Returns `value`. - * @example - * - * _([1, 2, 3]) - * .tap(function(array) { - * // Mutate input array. - * array.pop(); - * }) - * .reverse() - * .value(); - * // => [2, 1] - */ - function tap(value, interceptor) { - interceptor(value); - return value; - } - - /** - * This method is like `_.tap` except that it returns the result of `interceptor`. - * The purpose of this method is to "pass thru" values replacing intermediate - * results in a method chain sequence. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Seq - * @param {*} value The value to provide to `interceptor`. - * @param {Function} interceptor The function to invoke. - * @returns {*} Returns the result of `interceptor`. - * @example - * - * _(' abc ') - * .chain() - * .trim() - * .thru(function(value) { - * return [value]; - * }) - * .value(); - * // => ['abc'] - */ - function thru(value, interceptor) { - return interceptor(value); - } - - /** - * This method is the wrapper version of `_.at`. - * - * @name at - * @memberOf _ - * @since 1.0.0 - * @category Seq - * @param {...(string|string[])} [paths] The property paths to pick. - * @returns {Object} Returns the new `lodash` wrapper instance. - * @example - * - * var object = { 'a': [{ 'b': { 'c': 3 } }, 4] }; - * - * _(object).at(['a[0].b.c', 'a[1]']).value(); - * // => [3, 4] - */ - var wrapperAt = flatRest(function(paths) { - var length = paths.length, - start = length ? paths[0] : 0, - value = this.__wrapped__, - interceptor = function(object) { return baseAt(object, paths); }; - - if (length > 1 || this.__actions__.length || - !(value instanceof LazyWrapper) || !isIndex(start)) { - return this.thru(interceptor); - } - value = value.slice(start, +start + (length ? 1 : 0)); - value.__actions__.push({ - 'func': thru, - 'args': [interceptor], - 'thisArg': undefined - }); - return new LodashWrapper(value, this.__chain__).thru(function(array) { - if (length && !array.length) { - array.push(undefined); - } - return array; - }); - }); - - /** - * Creates a `lodash` wrapper instance with explicit method chain sequences enabled. - * - * @name chain - * @memberOf _ - * @since 0.1.0 - * @category Seq - * @returns {Object} Returns the new `lodash` wrapper instance. - * @example - * - * var users = [ - * { 'user': 'barney', 'age': 36 }, - * { 'user': 'fred', 'age': 40 } - * ]; - * - * // A sequence without explicit chaining. - * _(users).head(); - * // => { 'user': 'barney', 'age': 36 } - * - * // A sequence with explicit chaining. - * _(users) - * .chain() - * .head() - * .pick('user') - * .value(); - * // => { 'user': 'barney' } - */ - function wrapperChain() { - return chain(this); - } - - /** - * Executes the chain sequence and returns the wrapped result. - * - * @name commit - * @memberOf _ - * @since 3.2.0 - * @category Seq - * @returns {Object} Returns the new `lodash` wrapper instance. - * @example - * - * var array = [1, 2]; - * var wrapped = _(array).push(3); - * - * console.log(array); - * // => [1, 2] - * - * wrapped = wrapped.commit(); - * console.log(array); - * // => [1, 2, 3] - * - * wrapped.last(); - * // => 3 - * - * console.log(array); - * // => [1, 2, 3] - */ - function wrapperCommit() { - return new LodashWrapper(this.value(), this.__chain__); - } - - /** - * Gets the next value on a wrapped object following the - * [iterator protocol](https://mdn.io/iteration_protocols#iterator). - * - * @name next - * @memberOf _ - * @since 4.0.0 - * @category Seq - * @returns {Object} Returns the next iterator value. - * @example - * - * var wrapped = _([1, 2]); - * - * wrapped.next(); - * // => { 'done': false, 'value': 1 } - * - * wrapped.next(); - * // => { 'done': false, 'value': 2 } - * - * wrapped.next(); - * // => { 'done': true, 'value': undefined } - */ - function wrapperNext() { - if (this.__values__ === undefined) { - this.__values__ = toArray(this.value()); - } - var done = this.__index__ >= this.__values__.length, - value = done ? undefined : this.__values__[this.__index__++]; - - return { 'done': done, 'value': value }; - } - - /** - * Enables the wrapper to be iterable. - * - * @name Symbol.iterator - * @memberOf _ - * @since 4.0.0 - * @category Seq - * @returns {Object} Returns the wrapper object. - * @example - * - * var wrapped = _([1, 2]); - * - * wrapped[Symbol.iterator]() === wrapped; - * // => true - * - * Array.from(wrapped); - * // => [1, 2] - */ - function wrapperToIterator() { - return this; - } - - /** - * Creates a clone of the chain sequence planting `value` as the wrapped value. - * - * @name plant - * @memberOf _ - * @since 3.2.0 - * @category Seq - * @param {*} value The value to plant. - * @returns {Object} Returns the new `lodash` wrapper instance. - * @example - * - * function square(n) { - * return n * n; - * } - * - * var wrapped = _([1, 2]).map(square); - * var other = wrapped.plant([3, 4]); - * - * other.value(); - * // => [9, 16] - * - * wrapped.value(); - * // => [1, 4] - */ - function wrapperPlant(value) { - var result, - parent = this; - - while (parent instanceof baseLodash) { - var clone = wrapperClone(parent); - clone.__index__ = 0; - clone.__values__ = undefined; - if (result) { - previous.__wrapped__ = clone; - } else { - result = clone; - } - var previous = clone; - parent = parent.__wrapped__; - } - previous.__wrapped__ = value; - return result; - } - - /** - * This method is the wrapper version of `_.reverse`. - * - * **Note:** This method mutates the wrapped array. - * - * @name reverse - * @memberOf _ - * @since 0.1.0 - * @category Seq - * @returns {Object} Returns the new `lodash` wrapper instance. - * @example - * - * var array = [1, 2, 3]; - * - * _(array).reverse().value() - * // => [3, 2, 1] - * - * console.log(array); - * // => [3, 2, 1] - */ - function wrapperReverse() { - var value = this.__wrapped__; - if (value instanceof LazyWrapper) { - var wrapped = value; - if (this.__actions__.length) { - wrapped = new LazyWrapper(this); - } - wrapped = wrapped.reverse(); - wrapped.__actions__.push({ - 'func': thru, - 'args': [reverse], - 'thisArg': undefined - }); - return new LodashWrapper(wrapped, this.__chain__); - } - return this.thru(reverse); - } - - /** - * Executes the chain sequence to resolve the unwrapped value. - * - * @name value - * @memberOf _ - * @since 0.1.0 - * @alias toJSON, valueOf - * @category Seq - * @returns {*} Returns the resolved unwrapped value. - * @example - * - * _([1, 2, 3]).value(); - * // => [1, 2, 3] - */ - function wrapperValue() { - return baseWrapperValue(this.__wrapped__, this.__actions__); - } - - /*------------------------------------------------------------------------*/ - - /** - * Creates an object composed of keys generated from the results of running - * each element of `collection` thru `iteratee`. The corresponding value of - * each key is the number of times the key was returned by `iteratee`. The - * iteratee is invoked with one argument: (value). - * - * @static - * @memberOf _ - * @since 0.5.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The iteratee to transform keys. - * @returns {Object} Returns the composed aggregate object. - * @example - * - * _.countBy([6.1, 4.2, 6.3], Math.floor); - * // => { '4': 1, '6': 2 } - * - * // The `_.property` iteratee shorthand. - * _.countBy(['one', 'two', 'three'], 'length'); - * // => { '3': 2, '5': 1 } - */ - var countBy = createAggregator(function(result, value, key) { - if (hasOwnProperty.call(result, key)) { - ++result[key]; - } else { - baseAssignValue(result, key, 1); - } - }); - - /** - * Checks if `predicate` returns truthy for **all** elements of `collection`. - * Iteration is stopped once `predicate` returns falsey. The predicate is - * invoked with three arguments: (value, index|key, collection). - * - * **Note:** This method returns `true` for - * [empty collections](https://en.wikipedia.org/wiki/Empty_set) because - * [everything is true](https://en.wikipedia.org/wiki/Vacuous_truth) of - * elements of empty collections. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {boolean} Returns `true` if all elements pass the predicate check, - * else `false`. - * @example - * - * _.every([true, 1, null, 'yes'], Boolean); - * // => false - * - * var users = [ - * { 'user': 'barney', 'age': 36, 'active': false }, - * { 'user': 'fred', 'age': 40, 'active': false } - * ]; - * - * // The `_.matches` iteratee shorthand. - * _.every(users, { 'user': 'barney', 'active': false }); - * // => false - * - * // The `_.matchesProperty` iteratee shorthand. - * _.every(users, ['active', false]); - * // => true - * - * // The `_.property` iteratee shorthand. - * _.every(users, 'active'); - * // => false - */ - function every(collection, predicate, guard) { - var func = isArray(collection) ? arrayEvery : baseEvery; - if (guard && isIterateeCall(collection, predicate, guard)) { - predicate = undefined; - } - return func(collection, getIteratee(predicate, 3)); - } - - /** - * Iterates over elements of `collection`, returning an array of all elements - * `predicate` returns truthy for. The predicate is invoked with three - * arguments: (value, index|key, collection). - * - * **Note:** Unlike `_.remove`, this method returns a new array. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {Array} Returns the new filtered array. - * @see _.reject - * @example - * - * var users = [ - * { 'user': 'barney', 'age': 36, 'active': true }, - * { 'user': 'fred', 'age': 40, 'active': false } - * ]; - * - * _.filter(users, function(o) { return !o.active; }); - * // => objects for ['fred'] - * - * // The `_.matches` iteratee shorthand. - * _.filter(users, { 'age': 36, 'active': true }); - * // => objects for ['barney'] - * - * // The `_.matchesProperty` iteratee shorthand. - * _.filter(users, ['active', false]); - * // => objects for ['fred'] - * - * // The `_.property` iteratee shorthand. - * _.filter(users, 'active'); - * // => objects for ['barney'] - */ - function filter(collection, predicate) { - var func = isArray(collection) ? arrayFilter : baseFilter; - return func(collection, getIteratee(predicate, 3)); - } - - /** - * Iterates over elements of `collection`, returning the first element - * `predicate` returns truthy for. The predicate is invoked with three - * arguments: (value, index|key, collection). - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to inspect. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @param {number} [fromIndex=0] The index to search from. - * @returns {*} Returns the matched element, else `undefined`. - * @example - * - * var users = [ - * { 'user': 'barney', 'age': 36, 'active': true }, - * { 'user': 'fred', 'age': 40, 'active': false }, - * { 'user': 'pebbles', 'age': 1, 'active': true } - * ]; - * - * _.find(users, function(o) { return o.age < 40; }); - * // => object for 'barney' - * - * // The `_.matches` iteratee shorthand. - * _.find(users, { 'age': 1, 'active': true }); - * // => object for 'pebbles' - * - * // The `_.matchesProperty` iteratee shorthand. - * _.find(users, ['active', false]); - * // => object for 'fred' - * - * // The `_.property` iteratee shorthand. - * _.find(users, 'active'); - * // => object for 'barney' - */ - var find = createFind(findIndex); - - /** - * This method is like `_.find` except that it iterates over elements of - * `collection` from right to left. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @category Collection - * @param {Array|Object} collection The collection to inspect. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @param {number} [fromIndex=collection.length-1] The index to search from. - * @returns {*} Returns the matched element, else `undefined`. - * @example - * - * _.findLast([1, 2, 3, 4], function(n) { - * return n % 2 == 1; - * }); - * // => 3 - */ - var findLast = createFind(findLastIndex); - - /** - * Creates a flattened array of values by running each element in `collection` - * thru `iteratee` and flattening the mapped results. The iteratee is invoked - * with three arguments: (value, index|key, collection). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Array} Returns the new flattened array. - * @example - * - * function duplicate(n) { - * return [n, n]; - * } - * - * _.flatMap([1, 2], duplicate); - * // => [1, 1, 2, 2] - */ - function flatMap(collection, iteratee) { - return baseFlatten(map(collection, iteratee), 1); - } - - /** - * This method is like `_.flatMap` except that it recursively flattens the - * mapped results. - * - * @static - * @memberOf _ - * @since 4.7.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Array} Returns the new flattened array. - * @example - * - * function duplicate(n) { - * return [[[n, n]]]; - * } - * - * _.flatMapDeep([1, 2], duplicate); - * // => [1, 1, 2, 2] - */ - function flatMapDeep(collection, iteratee) { - return baseFlatten(map(collection, iteratee), INFINITY); - } - - /** - * This method is like `_.flatMap` except that it recursively flattens the - * mapped results up to `depth` times. - * - * @static - * @memberOf _ - * @since 4.7.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @param {number} [depth=1] The maximum recursion depth. - * @returns {Array} Returns the new flattened array. - * @example - * - * function duplicate(n) { - * return [[[n, n]]]; - * } - * - * _.flatMapDepth([1, 2], duplicate, 2); - * // => [[1, 1], [2, 2]] - */ - function flatMapDepth(collection, iteratee, depth) { - depth = depth === undefined ? 1 : toInteger(depth); - return baseFlatten(map(collection, iteratee), depth); - } - - /** - * Iterates over elements of `collection` and invokes `iteratee` for each element. - * The iteratee is invoked with three arguments: (value, index|key, collection). - * Iteratee functions may exit iteration early by explicitly returning `false`. - * - * **Note:** As with other "Collections" methods, objects with a "length" - * property are iterated like arrays. To avoid this behavior use `_.forIn` - * or `_.forOwn` for object iteration. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @alias each - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Array|Object} Returns `collection`. - * @see _.forEachRight - * @example - * - * _.forEach([1, 2], function(value) { - * console.log(value); - * }); - * // => Logs `1` then `2`. - * - * _.forEach({ 'a': 1, 'b': 2 }, function(value, key) { - * console.log(key); - * }); - * // => Logs 'a' then 'b' (iteration order is not guaranteed). - */ - function forEach(collection, iteratee) { - var func = isArray(collection) ? arrayEach : baseEach; - return func(collection, getIteratee(iteratee, 3)); - } - - /** - * This method is like `_.forEach` except that it iterates over elements of - * `collection` from right to left. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @alias eachRight - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Array|Object} Returns `collection`. - * @see _.forEach - * @example - * - * _.forEachRight([1, 2], function(value) { - * console.log(value); - * }); - * // => Logs `2` then `1`. - */ - function forEachRight(collection, iteratee) { - var func = isArray(collection) ? arrayEachRight : baseEachRight; - return func(collection, getIteratee(iteratee, 3)); - } - - /** - * Creates an object composed of keys generated from the results of running - * each element of `collection` thru `iteratee`. The order of grouped values - * is determined by the order they occur in `collection`. The corresponding - * value of each key is an array of elements responsible for generating the - * key. The iteratee is invoked with one argument: (value). - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The iteratee to transform keys. - * @returns {Object} Returns the composed aggregate object. - * @example - * - * _.groupBy([6.1, 4.2, 6.3], Math.floor); - * // => { '4': [4.2], '6': [6.1, 6.3] } - * - * // The `_.property` iteratee shorthand. - * _.groupBy(['one', 'two', 'three'], 'length'); - * // => { '3': ['one', 'two'], '5': ['three'] } - */ - var groupBy = createAggregator(function(result, value, key) { - if (hasOwnProperty.call(result, key)) { - result[key].push(value); - } else { - baseAssignValue(result, key, [value]); - } - }); - - /** - * Checks if `value` is in `collection`. If `collection` is a string, it's - * checked for a substring of `value`, otherwise - * [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * is used for equality comparisons. If `fromIndex` is negative, it's used as - * the offset from the end of `collection`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object|string} collection The collection to inspect. - * @param {*} value The value to search for. - * @param {number} [fromIndex=0] The index to search from. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.reduce`. - * @returns {boolean} Returns `true` if `value` is found, else `false`. - * @example - * - * _.includes([1, 2, 3], 1); - * // => true - * - * _.includes([1, 2, 3], 1, 2); - * // => false - * - * _.includes({ 'a': 1, 'b': 2 }, 1); - * // => true - * - * _.includes('abcd', 'bc'); - * // => true - */ - function includes(collection, value, fromIndex, guard) { - collection = isArrayLike(collection) ? collection : values(collection); - fromIndex = (fromIndex && !guard) ? toInteger(fromIndex) : 0; - - var length = collection.length; - if (fromIndex < 0) { - fromIndex = nativeMax(length + fromIndex, 0); - } - return isString(collection) - ? (fromIndex <= length && collection.indexOf(value, fromIndex) > -1) - : (!!length && baseIndexOf(collection, value, fromIndex) > -1); - } - - /** - * Invokes the method at `path` of each element in `collection`, returning - * an array of the results of each invoked method. Any additional arguments - * are provided to each invoked method. If `path` is a function, it's invoked - * for, and `this` bound to, each element in `collection`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Array|Function|string} path The path of the method to invoke or - * the function invoked per iteration. - * @param {...*} [args] The arguments to invoke each method with. - * @returns {Array} Returns the array of results. - * @example - * - * _.invokeMap([[5, 1, 7], [3, 2, 1]], 'sort'); - * // => [[1, 5, 7], [1, 2, 3]] - * - * _.invokeMap([123, 456], String.prototype.split, ''); - * // => [['1', '2', '3'], ['4', '5', '6']] - */ - var invokeMap = baseRest(function(collection, path, args) { - var index = -1, - isFunc = typeof path == 'function', - result = isArrayLike(collection) ? Array(collection.length) : []; - - baseEach(collection, function(value) { - result[++index] = isFunc ? apply(path, value, args) : baseInvoke(value, path, args); - }); - return result; - }); - - /** - * Creates an object composed of keys generated from the results of running - * each element of `collection` thru `iteratee`. The corresponding value of - * each key is the last element responsible for generating the key. The - * iteratee is invoked with one argument: (value). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The iteratee to transform keys. - * @returns {Object} Returns the composed aggregate object. - * @example - * - * var array = [ - * { 'dir': 'left', 'code': 97 }, - * { 'dir': 'right', 'code': 100 } - * ]; - * - * _.keyBy(array, function(o) { - * return String.fromCharCode(o.code); - * }); - * // => { 'a': { 'dir': 'left', 'code': 97 }, 'd': { 'dir': 'right', 'code': 100 } } - * - * _.keyBy(array, 'dir'); - * // => { 'left': { 'dir': 'left', 'code': 97 }, 'right': { 'dir': 'right', 'code': 100 } } - */ - var keyBy = createAggregator(function(result, value, key) { - baseAssignValue(result, key, value); - }); - - /** - * Creates an array of values by running each element in `collection` thru - * `iteratee`. The iteratee is invoked with three arguments: - * (value, index|key, collection). - * - * Many lodash methods are guarded to work as iteratees for methods like - * `_.every`, `_.filter`, `_.map`, `_.mapValues`, `_.reject`, and `_.some`. - * - * The guarded methods are: - * `ary`, `chunk`, `curry`, `curryRight`, `drop`, `dropRight`, `every`, - * `fill`, `invert`, `parseInt`, `random`, `range`, `rangeRight`, `repeat`, - * `sampleSize`, `slice`, `some`, `sortBy`, `split`, `take`, `takeRight`, - * `template`, `trim`, `trimEnd`, `trimStart`, and `words` - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Array} Returns the new mapped array. - * @example - * - * function square(n) { - * return n * n; - * } - * - * _.map([4, 8], square); - * // => [16, 64] - * - * _.map({ 'a': 4, 'b': 8 }, square); - * // => [16, 64] (iteration order is not guaranteed) - * - * var users = [ - * { 'user': 'barney' }, - * { 'user': 'fred' } - * ]; - * - * // The `_.property` iteratee shorthand. - * _.map(users, 'user'); - * // => ['barney', 'fred'] - */ - function map(collection, iteratee) { - var func = isArray(collection) ? arrayMap : baseMap; - return func(collection, getIteratee(iteratee, 3)); - } - - /** - * This method is like `_.sortBy` except that it allows specifying the sort - * orders of the iteratees to sort by. If `orders` is unspecified, all values - * are sorted in ascending order. Otherwise, specify an order of "desc" for - * descending or "asc" for ascending sort order of corresponding values. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Array[]|Function[]|Object[]|string[]} [iteratees=[_.identity]] - * The iteratees to sort by. - * @param {string[]} [orders] The sort orders of `iteratees`. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.reduce`. - * @returns {Array} Returns the new sorted array. - * @example - * - * var users = [ - * { 'user': 'fred', 'age': 48 }, - * { 'user': 'barney', 'age': 34 }, - * { 'user': 'fred', 'age': 40 }, - * { 'user': 'barney', 'age': 36 } - * ]; - * - * // Sort by `user` in ascending order and by `age` in descending order. - * _.orderBy(users, ['user', 'age'], ['asc', 'desc']); - * // => objects for [['barney', 36], ['barney', 34], ['fred', 48], ['fred', 40]] - */ - function orderBy(collection, iteratees, orders, guard) { - if (collection == null) { - return []; - } - if (!isArray(iteratees)) { - iteratees = iteratees == null ? [] : [iteratees]; - } - orders = guard ? undefined : orders; - if (!isArray(orders)) { - orders = orders == null ? [] : [orders]; - } - return baseOrderBy(collection, iteratees, orders); - } - - /** - * Creates an array of elements split into two groups, the first of which - * contains elements `predicate` returns truthy for, the second of which - * contains elements `predicate` returns falsey for. The predicate is - * invoked with one argument: (value). - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {Array} Returns the array of grouped elements. - * @example - * - * var users = [ - * { 'user': 'barney', 'age': 36, 'active': false }, - * { 'user': 'fred', 'age': 40, 'active': true }, - * { 'user': 'pebbles', 'age': 1, 'active': false } - * ]; - * - * _.partition(users, function(o) { return o.active; }); - * // => objects for [['fred'], ['barney', 'pebbles']] - * - * // The `_.matches` iteratee shorthand. - * _.partition(users, { 'age': 1, 'active': false }); - * // => objects for [['pebbles'], ['barney', 'fred']] - * - * // The `_.matchesProperty` iteratee shorthand. - * _.partition(users, ['active', false]); - * // => objects for [['barney', 'pebbles'], ['fred']] - * - * // The `_.property` iteratee shorthand. - * _.partition(users, 'active'); - * // => objects for [['fred'], ['barney', 'pebbles']] - */ - var partition = createAggregator(function(result, value, key) { - result[key ? 0 : 1].push(value); - }, function() { return [[], []]; }); - - /** - * Reduces `collection` to a value which is the accumulated result of running - * each element in `collection` thru `iteratee`, where each successive - * invocation is supplied the return value of the previous. If `accumulator` - * is not given, the first element of `collection` is used as the initial - * value. The iteratee is invoked with four arguments: - * (accumulator, value, index|key, collection). - * - * Many lodash methods are guarded to work as iteratees for methods like - * `_.reduce`, `_.reduceRight`, and `_.transform`. - * - * The guarded methods are: - * `assign`, `defaults`, `defaultsDeep`, `includes`, `merge`, `orderBy`, - * and `sortBy` - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @param {*} [accumulator] The initial value. - * @returns {*} Returns the accumulated value. - * @see _.reduceRight - * @example - * - * _.reduce([1, 2], function(sum, n) { - * return sum + n; - * }, 0); - * // => 3 - * - * _.reduce({ 'a': 1, 'b': 2, 'c': 1 }, function(result, value, key) { - * (result[value] || (result[value] = [])).push(key); - * return result; - * }, {}); - * // => { '1': ['a', 'c'], '2': ['b'] } (iteration order is not guaranteed) - */ - function reduce(collection, iteratee, accumulator) { - var func = isArray(collection) ? arrayReduce : baseReduce, - initAccum = arguments.length < 3; - - return func(collection, getIteratee(iteratee, 4), accumulator, initAccum, baseEach); - } - - /** - * This method is like `_.reduce` except that it iterates over elements of - * `collection` from right to left. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @param {*} [accumulator] The initial value. - * @returns {*} Returns the accumulated value. - * @see _.reduce - * @example - * - * var array = [[0, 1], [2, 3], [4, 5]]; - * - * _.reduceRight(array, function(flattened, other) { - * return flattened.concat(other); - * }, []); - * // => [4, 5, 2, 3, 0, 1] - */ - function reduceRight(collection, iteratee, accumulator) { - var func = isArray(collection) ? arrayReduceRight : baseReduce, - initAccum = arguments.length < 3; - - return func(collection, getIteratee(iteratee, 4), accumulator, initAccum, baseEachRight); - } - - /** - * The opposite of `_.filter`; this method returns the elements of `collection` - * that `predicate` does **not** return truthy for. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {Array} Returns the new filtered array. - * @see _.filter - * @example - * - * var users = [ - * { 'user': 'barney', 'age': 36, 'active': false }, - * { 'user': 'fred', 'age': 40, 'active': true } - * ]; - * - * _.reject(users, function(o) { return !o.active; }); - * // => objects for ['fred'] - * - * // The `_.matches` iteratee shorthand. - * _.reject(users, { 'age': 40, 'active': true }); - * // => objects for ['barney'] - * - * // The `_.matchesProperty` iteratee shorthand. - * _.reject(users, ['active', false]); - * // => objects for ['fred'] - * - * // The `_.property` iteratee shorthand. - * _.reject(users, 'active'); - * // => objects for ['barney'] - */ - function reject(collection, predicate) { - var func = isArray(collection) ? arrayFilter : baseFilter; - return func(collection, negate(getIteratee(predicate, 3))); - } - - /** - * Gets a random element from `collection`. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @category Collection - * @param {Array|Object} collection The collection to sample. - * @returns {*} Returns the random element. - * @example - * - * _.sample([1, 2, 3, 4]); - * // => 2 - */ - function sample(collection) { - var func = isArray(collection) ? arraySample : baseSample; - return func(collection); - } - - /** - * Gets `n` random elements at unique keys from `collection` up to the - * size of `collection`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Collection - * @param {Array|Object} collection The collection to sample. - * @param {number} [n=1] The number of elements to sample. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Array} Returns the random elements. - * @example - * - * _.sampleSize([1, 2, 3], 2); - * // => [3, 1] - * - * _.sampleSize([1, 2, 3], 4); - * // => [2, 3, 1] - */ - function sampleSize(collection, n, guard) { - if ((guard ? isIterateeCall(collection, n, guard) : n === undefined)) { - n = 1; - } else { - n = toInteger(n); - } - var func = isArray(collection) ? arraySampleSize : baseSampleSize; - return func(collection, n); - } - - /** - * Creates an array of shuffled values, using a version of the - * [Fisher-Yates shuffle](https://en.wikipedia.org/wiki/Fisher-Yates_shuffle). - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to shuffle. - * @returns {Array} Returns the new shuffled array. - * @example - * - * _.shuffle([1, 2, 3, 4]); - * // => [4, 1, 3, 2] - */ - function shuffle(collection) { - var func = isArray(collection) ? arrayShuffle : baseShuffle; - return func(collection); - } - - /** - * Gets the size of `collection` by returning its length for array-like - * values or the number of own enumerable string keyed properties for objects. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object|string} collection The collection to inspect. - * @returns {number} Returns the collection size. - * @example - * - * _.size([1, 2, 3]); - * // => 3 - * - * _.size({ 'a': 1, 'b': 2 }); - * // => 2 - * - * _.size('pebbles'); - * // => 7 - */ - function size(collection) { - if (collection == null) { - return 0; - } - if (isArrayLike(collection)) { - return isString(collection) ? stringSize(collection) : collection.length; - } - var tag = getTag(collection); - if (tag == mapTag || tag == setTag) { - return collection.size; - } - return baseKeys(collection).length; - } - - /** - * Checks if `predicate` returns truthy for **any** element of `collection`. - * Iteration is stopped once `predicate` returns truthy. The predicate is - * invoked with three arguments: (value, index|key, collection). - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {boolean} Returns `true` if any element passes the predicate check, - * else `false`. - * @example - * - * _.some([null, 0, 'yes', false], Boolean); - * // => true - * - * var users = [ - * { 'user': 'barney', 'active': true }, - * { 'user': 'fred', 'active': false } - * ]; - * - * // The `_.matches` iteratee shorthand. - * _.some(users, { 'user': 'barney', 'active': false }); - * // => false - * - * // The `_.matchesProperty` iteratee shorthand. - * _.some(users, ['active', false]); - * // => true - * - * // The `_.property` iteratee shorthand. - * _.some(users, 'active'); - * // => true - */ - function some(collection, predicate, guard) { - var func = isArray(collection) ? arraySome : baseSome; - if (guard && isIterateeCall(collection, predicate, guard)) { - predicate = undefined; - } - return func(collection, getIteratee(predicate, 3)); - } - - /** - * Creates an array of elements, sorted in ascending order by the results of - * running each element in a collection thru each iteratee. This method - * performs a stable sort, that is, it preserves the original sort order of - * equal elements. The iteratees are invoked with one argument: (value). - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Collection - * @param {Array|Object} collection The collection to iterate over. - * @param {...(Function|Function[])} [iteratees=[_.identity]] - * The iteratees to sort by. - * @returns {Array} Returns the new sorted array. - * @example - * - * var users = [ - * { 'user': 'fred', 'age': 48 }, - * { 'user': 'barney', 'age': 36 }, - * { 'user': 'fred', 'age': 40 }, - * { 'user': 'barney', 'age': 34 } - * ]; - * - * _.sortBy(users, [function(o) { return o.user; }]); - * // => objects for [['barney', 36], ['barney', 34], ['fred', 48], ['fred', 40]] - * - * _.sortBy(users, ['user', 'age']); - * // => objects for [['barney', 34], ['barney', 36], ['fred', 40], ['fred', 48]] - */ - var sortBy = baseRest(function(collection, iteratees) { - if (collection == null) { - return []; - } - var length = iteratees.length; - if (length > 1 && isIterateeCall(collection, iteratees[0], iteratees[1])) { - iteratees = []; - } else if (length > 2 && isIterateeCall(iteratees[0], iteratees[1], iteratees[2])) { - iteratees = [iteratees[0]]; - } - return baseOrderBy(collection, baseFlatten(iteratees, 1), []); - }); - - /*------------------------------------------------------------------------*/ - - /** - * Gets the timestamp of the number of milliseconds that have elapsed since - * the Unix epoch (1 January 1970 00:00:00 UTC). - * - * @static - * @memberOf _ - * @since 2.4.0 - * @category Date - * @returns {number} Returns the timestamp. - * @example - * - * _.defer(function(stamp) { - * console.log(_.now() - stamp); - * }, _.now()); - * // => Logs the number of milliseconds it took for the deferred invocation. - */ - var now = ctxNow || function() { - return root.Date.now(); - }; - - /*------------------------------------------------------------------------*/ - - /** - * The opposite of `_.before`; this method creates a function that invokes - * `func` once it's called `n` or more times. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Function - * @param {number} n The number of calls before `func` is invoked. - * @param {Function} func The function to restrict. - * @returns {Function} Returns the new restricted function. - * @example - * - * var saves = ['profile', 'settings']; - * - * var done = _.after(saves.length, function() { - * console.log('done saving!'); - * }); - * - * _.forEach(saves, function(type) { - * asyncSave({ 'type': type, 'complete': done }); - * }); - * // => Logs 'done saving!' after the two async saves have completed. - */ - function after(n, func) { - if (typeof func != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - n = toInteger(n); - return function() { - if (--n < 1) { - return func.apply(this, arguments); - } - }; - } - - /** - * Creates a function that invokes `func`, with up to `n` arguments, - * ignoring any additional arguments. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Function - * @param {Function} func The function to cap arguments for. - * @param {number} [n=func.length] The arity cap. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Function} Returns the new capped function. - * @example - * - * _.map(['6', '8', '10'], _.ary(parseInt, 1)); - * // => [6, 8, 10] - */ - function ary(func, n, guard) { - n = guard ? undefined : n; - n = (func && n == null) ? func.length : n; - return createWrap(func, WRAP_ARY_FLAG, undefined, undefined, undefined, undefined, n); - } - - /** - * Creates a function that invokes `func`, with the `this` binding and arguments - * of the created function, while it's called less than `n` times. Subsequent - * calls to the created function return the result of the last `func` invocation. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Function - * @param {number} n The number of calls at which `func` is no longer invoked. - * @param {Function} func The function to restrict. - * @returns {Function} Returns the new restricted function. - * @example - * - * jQuery(element).on('click', _.before(5, addContactToList)); - * // => Allows adding up to 4 contacts to the list. - */ - function before(n, func) { - var result; - if (typeof func != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - n = toInteger(n); - return function() { - if (--n > 0) { - result = func.apply(this, arguments); - } - if (n <= 1) { - func = undefined; - } - return result; - }; - } - - /** - * Creates a function that invokes `func` with the `this` binding of `thisArg` - * and `partials` prepended to the arguments it receives. - * - * The `_.bind.placeholder` value, which defaults to `_` in monolithic builds, - * may be used as a placeholder for partially applied arguments. - * - * **Note:** Unlike native `Function#bind`, this method doesn't set the "length" - * property of bound functions. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Function - * @param {Function} func The function to bind. - * @param {*} thisArg The `this` binding of `func`. - * @param {...*} [partials] The arguments to be partially applied. - * @returns {Function} Returns the new bound function. - * @example - * - * function greet(greeting, punctuation) { - * return greeting + ' ' + this.user + punctuation; - * } - * - * var object = { 'user': 'fred' }; - * - * var bound = _.bind(greet, object, 'hi'); - * bound('!'); - * // => 'hi fred!' - * - * // Bound with placeholders. - * var bound = _.bind(greet, object, _, '!'); - * bound('hi'); - * // => 'hi fred!' - */ - var bind = baseRest(function(func, thisArg, partials) { - var bitmask = WRAP_BIND_FLAG; - if (partials.length) { - var holders = replaceHolders(partials, getHolder(bind)); - bitmask |= WRAP_PARTIAL_FLAG; - } - return createWrap(func, bitmask, thisArg, partials, holders); - }); - - /** - * Creates a function that invokes the method at `object[key]` with `partials` - * prepended to the arguments it receives. - * - * This method differs from `_.bind` by allowing bound functions to reference - * methods that may be redefined or don't yet exist. See - * [Peter Michaux's article](http://peter.michaux.ca/articles/lazy-function-definition-pattern) - * for more details. - * - * The `_.bindKey.placeholder` value, which defaults to `_` in monolithic - * builds, may be used as a placeholder for partially applied arguments. - * - * @static - * @memberOf _ - * @since 0.10.0 - * @category Function - * @param {Object} object The object to invoke the method on. - * @param {string} key The key of the method. - * @param {...*} [partials] The arguments to be partially applied. - * @returns {Function} Returns the new bound function. - * @example - * - * var object = { - * 'user': 'fred', - * 'greet': function(greeting, punctuation) { - * return greeting + ' ' + this.user + punctuation; - * } - * }; - * - * var bound = _.bindKey(object, 'greet', 'hi'); - * bound('!'); - * // => 'hi fred!' - * - * object.greet = function(greeting, punctuation) { - * return greeting + 'ya ' + this.user + punctuation; - * }; - * - * bound('!'); - * // => 'hiya fred!' - * - * // Bound with placeholders. - * var bound = _.bindKey(object, 'greet', _, '!'); - * bound('hi'); - * // => 'hiya fred!' - */ - var bindKey = baseRest(function(object, key, partials) { - var bitmask = WRAP_BIND_FLAG | WRAP_BIND_KEY_FLAG; - if (partials.length) { - var holders = replaceHolders(partials, getHolder(bindKey)); - bitmask |= WRAP_PARTIAL_FLAG; - } - return createWrap(key, bitmask, object, partials, holders); - }); - - /** - * Creates a function that accepts arguments of `func` and either invokes - * `func` returning its result, if at least `arity` number of arguments have - * been provided, or returns a function that accepts the remaining `func` - * arguments, and so on. The arity of `func` may be specified if `func.length` - * is not sufficient. - * - * The `_.curry.placeholder` value, which defaults to `_` in monolithic builds, - * may be used as a placeholder for provided arguments. - * - * **Note:** This method doesn't set the "length" property of curried functions. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @category Function - * @param {Function} func The function to curry. - * @param {number} [arity=func.length] The arity of `func`. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Function} Returns the new curried function. - * @example - * - * var abc = function(a, b, c) { - * return [a, b, c]; - * }; - * - * var curried = _.curry(abc); - * - * curried(1)(2)(3); - * // => [1, 2, 3] - * - * curried(1, 2)(3); - * // => [1, 2, 3] - * - * curried(1, 2, 3); - * // => [1, 2, 3] - * - * // Curried with placeholders. - * curried(1)(_, 3)(2); - * // => [1, 2, 3] - */ - function curry(func, arity, guard) { - arity = guard ? undefined : arity; - var result = createWrap(func, WRAP_CURRY_FLAG, undefined, undefined, undefined, undefined, undefined, arity); - result.placeholder = curry.placeholder; - return result; - } - - /** - * This method is like `_.curry` except that arguments are applied to `func` - * in the manner of `_.partialRight` instead of `_.partial`. - * - * The `_.curryRight.placeholder` value, which defaults to `_` in monolithic - * builds, may be used as a placeholder for provided arguments. - * - * **Note:** This method doesn't set the "length" property of curried functions. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Function - * @param {Function} func The function to curry. - * @param {number} [arity=func.length] The arity of `func`. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Function} Returns the new curried function. - * @example - * - * var abc = function(a, b, c) { - * return [a, b, c]; - * }; - * - * var curried = _.curryRight(abc); - * - * curried(3)(2)(1); - * // => [1, 2, 3] - * - * curried(2, 3)(1); - * // => [1, 2, 3] - * - * curried(1, 2, 3); - * // => [1, 2, 3] - * - * // Curried with placeholders. - * curried(3)(1, _)(2); - * // => [1, 2, 3] - */ - function curryRight(func, arity, guard) { - arity = guard ? undefined : arity; - var result = createWrap(func, WRAP_CURRY_RIGHT_FLAG, undefined, undefined, undefined, undefined, undefined, arity); - result.placeholder = curryRight.placeholder; - return result; - } - - /** - * Creates a debounced function that delays invoking `func` until after `wait` - * milliseconds have elapsed since the last time the debounced function was - * invoked. The debounced function comes with a `cancel` method to cancel - * delayed `func` invocations and a `flush` method to immediately invoke them. - * Provide `options` to indicate whether `func` should be invoked on the - * leading and/or trailing edge of the `wait` timeout. The `func` is invoked - * with the last arguments provided to the debounced function. Subsequent - * calls to the debounced function return the result of the last `func` - * invocation. - * - * **Note:** If `leading` and `trailing` options are `true`, `func` is - * invoked on the trailing edge of the timeout only if the debounced function - * is invoked more than once during the `wait` timeout. - * - * If `wait` is `0` and `leading` is `false`, `func` invocation is deferred - * until to the next tick, similar to `setTimeout` with a timeout of `0`. - * - * See [David Corbacho's article](https://css-tricks.com/debouncing-throttling-explained-examples/) - * for details over the differences between `_.debounce` and `_.throttle`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Function - * @param {Function} func The function to debounce. - * @param {number} [wait=0] The number of milliseconds to delay. - * @param {Object} [options={}] The options object. - * @param {boolean} [options.leading=false] - * Specify invoking on the leading edge of the timeout. - * @param {number} [options.maxWait] - * The maximum time `func` is allowed to be delayed before it's invoked. - * @param {boolean} [options.trailing=true] - * Specify invoking on the trailing edge of the timeout. - * @returns {Function} Returns the new debounced function. - * @example - * - * // Avoid costly calculations while the window size is in flux. - * jQuery(window).on('resize', _.debounce(calculateLayout, 150)); - * - * // Invoke `sendMail` when clicked, debouncing subsequent calls. - * jQuery(element).on('click', _.debounce(sendMail, 300, { - * 'leading': true, - * 'trailing': false - * })); - * - * // Ensure `batchLog` is invoked once after 1 second of debounced calls. - * var debounced = _.debounce(batchLog, 250, { 'maxWait': 1000 }); - * var source = new EventSource('/stream'); - * jQuery(source).on('message', debounced); - * - * // Cancel the trailing debounced invocation. - * jQuery(window).on('popstate', debounced.cancel); - */ - function debounce(func, wait, options) { - var lastArgs, - lastThis, - maxWait, - result, - timerId, - lastCallTime, - lastInvokeTime = 0, - leading = false, - maxing = false, - trailing = true; - - if (typeof func != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - wait = toNumber(wait) || 0; - if (isObject(options)) { - leading = !!options.leading; - maxing = 'maxWait' in options; - maxWait = maxing ? nativeMax(toNumber(options.maxWait) || 0, wait) : maxWait; - trailing = 'trailing' in options ? !!options.trailing : trailing; - } - - function invokeFunc(time) { - var args = lastArgs, - thisArg = lastThis; - - lastArgs = lastThis = undefined; - lastInvokeTime = time; - result = func.apply(thisArg, args); - return result; - } - - function leadingEdge(time) { - // Reset any `maxWait` timer. - lastInvokeTime = time; - // Start the timer for the trailing edge. - timerId = setTimeout(timerExpired, wait); - // Invoke the leading edge. - return leading ? invokeFunc(time) : result; - } - - function remainingWait(time) { - var timeSinceLastCall = time - lastCallTime, - timeSinceLastInvoke = time - lastInvokeTime, - timeWaiting = wait - timeSinceLastCall; - - return maxing - ? nativeMin(timeWaiting, maxWait - timeSinceLastInvoke) - : timeWaiting; - } - - function shouldInvoke(time) { - var timeSinceLastCall = time - lastCallTime, - timeSinceLastInvoke = time - lastInvokeTime; - - // Either this is the first call, activity has stopped and we're at the - // trailing edge, the system time has gone backwards and we're treating - // it as the trailing edge, or we've hit the `maxWait` limit. - return (lastCallTime === undefined || (timeSinceLastCall >= wait) || - (timeSinceLastCall < 0) || (maxing && timeSinceLastInvoke >= maxWait)); - } - - function timerExpired() { - var time = now(); - if (shouldInvoke(time)) { - return trailingEdge(time); - } - // Restart the timer. - timerId = setTimeout(timerExpired, remainingWait(time)); - } - - function trailingEdge(time) { - timerId = undefined; - - // Only invoke if we have `lastArgs` which means `func` has been - // debounced at least once. - if (trailing && lastArgs) { - return invokeFunc(time); - } - lastArgs = lastThis = undefined; - return result; - } - - function cancel() { - if (timerId !== undefined) { - clearTimeout(timerId); - } - lastInvokeTime = 0; - lastArgs = lastCallTime = lastThis = timerId = undefined; - } - - function flush() { - return timerId === undefined ? result : trailingEdge(now()); - } - - function debounced() { - var time = now(), - isInvoking = shouldInvoke(time); - - lastArgs = arguments; - lastThis = this; - lastCallTime = time; - - if (isInvoking) { - if (timerId === undefined) { - return leadingEdge(lastCallTime); - } - if (maxing) { - // Handle invocations in a tight loop. - clearTimeout(timerId); - timerId = setTimeout(timerExpired, wait); - return invokeFunc(lastCallTime); - } - } - if (timerId === undefined) { - timerId = setTimeout(timerExpired, wait); - } - return result; - } - debounced.cancel = cancel; - debounced.flush = flush; - return debounced; - } - - /** - * Defers invoking the `func` until the current call stack has cleared. Any - * additional arguments are provided to `func` when it's invoked. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Function - * @param {Function} func The function to defer. - * @param {...*} [args] The arguments to invoke `func` with. - * @returns {number} Returns the timer id. - * @example - * - * _.defer(function(text) { - * console.log(text); - * }, 'deferred'); - * // => Logs 'deferred' after one millisecond. - */ - var defer = baseRest(function(func, args) { - return baseDelay(func, 1, args); - }); - - /** - * Invokes `func` after `wait` milliseconds. Any additional arguments are - * provided to `func` when it's invoked. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Function - * @param {Function} func The function to delay. - * @param {number} wait The number of milliseconds to delay invocation. - * @param {...*} [args] The arguments to invoke `func` with. - * @returns {number} Returns the timer id. - * @example - * - * _.delay(function(text) { - * console.log(text); - * }, 1000, 'later'); - * // => Logs 'later' after one second. - */ - var delay = baseRest(function(func, wait, args) { - return baseDelay(func, toNumber(wait) || 0, args); - }); - - /** - * Creates a function that invokes `func` with arguments reversed. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Function - * @param {Function} func The function to flip arguments for. - * @returns {Function} Returns the new flipped function. - * @example - * - * var flipped = _.flip(function() { - * return _.toArray(arguments); - * }); - * - * flipped('a', 'b', 'c', 'd'); - * // => ['d', 'c', 'b', 'a'] - */ - function flip(func) { - return createWrap(func, WRAP_FLIP_FLAG); - } - - /** - * Creates a function that memoizes the result of `func`. If `resolver` is - * provided, it determines the cache key for storing the result based on the - * arguments provided to the memoized function. By default, the first argument - * provided to the memoized function is used as the map cache key. The `func` - * is invoked with the `this` binding of the memoized function. - * - * **Note:** The cache is exposed as the `cache` property on the memoized - * function. Its creation may be customized by replacing the `_.memoize.Cache` - * constructor with one whose instances implement the - * [`Map`](http://ecma-international.org/ecma-262/7.0/#sec-properties-of-the-map-prototype-object) - * method interface of `clear`, `delete`, `get`, `has`, and `set`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Function - * @param {Function} func The function to have its output memoized. - * @param {Function} [resolver] The function to resolve the cache key. - * @returns {Function} Returns the new memoized function. - * @example - * - * var object = { 'a': 1, 'b': 2 }; - * var other = { 'c': 3, 'd': 4 }; - * - * var values = _.memoize(_.values); - * values(object); - * // => [1, 2] - * - * values(other); - * // => [3, 4] - * - * object.a = 2; - * values(object); - * // => [1, 2] - * - * // Modify the result cache. - * values.cache.set(object, ['a', 'b']); - * values(object); - * // => ['a', 'b'] - * - * // Replace `_.memoize.Cache`. - * _.memoize.Cache = WeakMap; - */ - function memoize(func, resolver) { - if (typeof func != 'function' || (resolver != null && typeof resolver != 'function')) { - throw new TypeError(FUNC_ERROR_TEXT); - } - var memoized = function() { - var args = arguments, - key = resolver ? resolver.apply(this, args) : args[0], - cache = memoized.cache; - - if (cache.has(key)) { - return cache.get(key); - } - var result = func.apply(this, args); - memoized.cache = cache.set(key, result) || cache; - return result; - }; - memoized.cache = new (memoize.Cache || MapCache); - return memoized; - } - - // Expose `MapCache`. - memoize.Cache = MapCache; - - /** - * Creates a function that negates the result of the predicate `func`. The - * `func` predicate is invoked with the `this` binding and arguments of the - * created function. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Function - * @param {Function} predicate The predicate to negate. - * @returns {Function} Returns the new negated function. - * @example - * - * function isEven(n) { - * return n % 2 == 0; - * } - * - * _.filter([1, 2, 3, 4, 5, 6], _.negate(isEven)); - * // => [1, 3, 5] - */ - function negate(predicate) { - if (typeof predicate != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - return function() { - var args = arguments; - switch (args.length) { - case 0: return !predicate.call(this); - case 1: return !predicate.call(this, args[0]); - case 2: return !predicate.call(this, args[0], args[1]); - case 3: return !predicate.call(this, args[0], args[1], args[2]); - } - return !predicate.apply(this, args); - }; - } - - /** - * Creates a function that is restricted to invoking `func` once. Repeat calls - * to the function return the value of the first invocation. The `func` is - * invoked with the `this` binding and arguments of the created function. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Function - * @param {Function} func The function to restrict. - * @returns {Function} Returns the new restricted function. - * @example - * - * var initialize = _.once(createApplication); - * initialize(); - * initialize(); - * // => `createApplication` is invoked once - */ - function once(func) { - return before(2, func); - } - - /** - * Creates a function that invokes `func` with its arguments transformed. - * - * @static - * @since 4.0.0 - * @memberOf _ - * @category Function - * @param {Function} func The function to wrap. - * @param {...(Function|Function[])} [transforms=[_.identity]] - * The argument transforms. - * @returns {Function} Returns the new function. - * @example - * - * function doubled(n) { - * return n * 2; - * } - * - * function square(n) { - * return n * n; - * } - * - * var func = _.overArgs(function(x, y) { - * return [x, y]; - * }, [square, doubled]); - * - * func(9, 3); - * // => [81, 6] - * - * func(10, 5); - * // => [100, 10] - */ - var overArgs = castRest(function(func, transforms) { - transforms = (transforms.length == 1 && isArray(transforms[0])) - ? arrayMap(transforms[0], baseUnary(getIteratee())) - : arrayMap(baseFlatten(transforms, 1), baseUnary(getIteratee())); - - var funcsLength = transforms.length; - return baseRest(function(args) { - var index = -1, - length = nativeMin(args.length, funcsLength); - - while (++index < length) { - args[index] = transforms[index].call(this, args[index]); - } - return apply(func, this, args); - }); - }); - - /** - * Creates a function that invokes `func` with `partials` prepended to the - * arguments it receives. This method is like `_.bind` except it does **not** - * alter the `this` binding. - * - * The `_.partial.placeholder` value, which defaults to `_` in monolithic - * builds, may be used as a placeholder for partially applied arguments. - * - * **Note:** This method doesn't set the "length" property of partially - * applied functions. - * - * @static - * @memberOf _ - * @since 0.2.0 - * @category Function - * @param {Function} func The function to partially apply arguments to. - * @param {...*} [partials] The arguments to be partially applied. - * @returns {Function} Returns the new partially applied function. - * @example - * - * function greet(greeting, name) { - * return greeting + ' ' + name; - * } - * - * var sayHelloTo = _.partial(greet, 'hello'); - * sayHelloTo('fred'); - * // => 'hello fred' - * - * // Partially applied with placeholders. - * var greetFred = _.partial(greet, _, 'fred'); - * greetFred('hi'); - * // => 'hi fred' - */ - var partial = baseRest(function(func, partials) { - var holders = replaceHolders(partials, getHolder(partial)); - return createWrap(func, WRAP_PARTIAL_FLAG, undefined, partials, holders); - }); - - /** - * This method is like `_.partial` except that partially applied arguments - * are appended to the arguments it receives. - * - * The `_.partialRight.placeholder` value, which defaults to `_` in monolithic - * builds, may be used as a placeholder for partially applied arguments. - * - * **Note:** This method doesn't set the "length" property of partially - * applied functions. - * - * @static - * @memberOf _ - * @since 1.0.0 - * @category Function - * @param {Function} func The function to partially apply arguments to. - * @param {...*} [partials] The arguments to be partially applied. - * @returns {Function} Returns the new partially applied function. - * @example - * - * function greet(greeting, name) { - * return greeting + ' ' + name; - * } - * - * var greetFred = _.partialRight(greet, 'fred'); - * greetFred('hi'); - * // => 'hi fred' - * - * // Partially applied with placeholders. - * var sayHelloTo = _.partialRight(greet, 'hello', _); - * sayHelloTo('fred'); - * // => 'hello fred' - */ - var partialRight = baseRest(function(func, partials) { - var holders = replaceHolders(partials, getHolder(partialRight)); - return createWrap(func, WRAP_PARTIAL_RIGHT_FLAG, undefined, partials, holders); - }); - - /** - * Creates a function that invokes `func` with arguments arranged according - * to the specified `indexes` where the argument value at the first index is - * provided as the first argument, the argument value at the second index is - * provided as the second argument, and so on. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Function - * @param {Function} func The function to rearrange arguments for. - * @param {...(number|number[])} indexes The arranged argument indexes. - * @returns {Function} Returns the new function. - * @example - * - * var rearged = _.rearg(function(a, b, c) { - * return [a, b, c]; - * }, [2, 0, 1]); - * - * rearged('b', 'c', 'a') - * // => ['a', 'b', 'c'] - */ - var rearg = flatRest(function(func, indexes) { - return createWrap(func, WRAP_REARG_FLAG, undefined, undefined, undefined, indexes); - }); - - /** - * Creates a function that invokes `func` with the `this` binding of the - * created function and arguments from `start` and beyond provided as - * an array. - * - * **Note:** This method is based on the - * [rest parameter](https://mdn.io/rest_parameters). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Function - * @param {Function} func The function to apply a rest parameter to. - * @param {number} [start=func.length-1] The start position of the rest parameter. - * @returns {Function} Returns the new function. - * @example - * - * var say = _.rest(function(what, names) { - * return what + ' ' + _.initial(names).join(', ') + - * (_.size(names) > 1 ? ', & ' : '') + _.last(names); - * }); - * - * say('hello', 'fred', 'barney', 'pebbles'); - * // => 'hello fred, barney, & pebbles' - */ - function rest(func, start) { - if (typeof func != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - start = start === undefined ? start : toInteger(start); - return baseRest(func, start); - } - - /** - * Creates a function that invokes `func` with the `this` binding of the - * create function and an array of arguments much like - * [`Function#apply`](http://www.ecma-international.org/ecma-262/7.0/#sec-function.prototype.apply). - * - * **Note:** This method is based on the - * [spread operator](https://mdn.io/spread_operator). - * - * @static - * @memberOf _ - * @since 3.2.0 - * @category Function - * @param {Function} func The function to spread arguments over. - * @param {number} [start=0] The start position of the spread. - * @returns {Function} Returns the new function. - * @example - * - * var say = _.spread(function(who, what) { - * return who + ' says ' + what; - * }); - * - * say(['fred', 'hello']); - * // => 'fred says hello' - * - * var numbers = Promise.all([ - * Promise.resolve(40), - * Promise.resolve(36) - * ]); - * - * numbers.then(_.spread(function(x, y) { - * return x + y; - * })); - * // => a Promise of 76 - */ - function spread(func, start) { - if (typeof func != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - start = start == null ? 0 : nativeMax(toInteger(start), 0); - return baseRest(function(args) { - var array = args[start], - otherArgs = castSlice(args, 0, start); - - if (array) { - arrayPush(otherArgs, array); - } - return apply(func, this, otherArgs); - }); - } - - /** - * Creates a throttled function that only invokes `func` at most once per - * every `wait` milliseconds. The throttled function comes with a `cancel` - * method to cancel delayed `func` invocations and a `flush` method to - * immediately invoke them. Provide `options` to indicate whether `func` - * should be invoked on the leading and/or trailing edge of the `wait` - * timeout. The `func` is invoked with the last arguments provided to the - * throttled function. Subsequent calls to the throttled function return the - * result of the last `func` invocation. - * - * **Note:** If `leading` and `trailing` options are `true`, `func` is - * invoked on the trailing edge of the timeout only if the throttled function - * is invoked more than once during the `wait` timeout. - * - * If `wait` is `0` and `leading` is `false`, `func` invocation is deferred - * until to the next tick, similar to `setTimeout` with a timeout of `0`. - * - * See [David Corbacho's article](https://css-tricks.com/debouncing-throttling-explained-examples/) - * for details over the differences between `_.throttle` and `_.debounce`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Function - * @param {Function} func The function to throttle. - * @param {number} [wait=0] The number of milliseconds to throttle invocations to. - * @param {Object} [options={}] The options object. - * @param {boolean} [options.leading=true] - * Specify invoking on the leading edge of the timeout. - * @param {boolean} [options.trailing=true] - * Specify invoking on the trailing edge of the timeout. - * @returns {Function} Returns the new throttled function. - * @example - * - * // Avoid excessively updating the position while scrolling. - * jQuery(window).on('scroll', _.throttle(updatePosition, 100)); - * - * // Invoke `renewToken` when the click event is fired, but not more than once every 5 minutes. - * var throttled = _.throttle(renewToken, 300000, { 'trailing': false }); - * jQuery(element).on('click', throttled); - * - * // Cancel the trailing throttled invocation. - * jQuery(window).on('popstate', throttled.cancel); - */ - function throttle(func, wait, options) { - var leading = true, - trailing = true; - - if (typeof func != 'function') { - throw new TypeError(FUNC_ERROR_TEXT); - } - if (isObject(options)) { - leading = 'leading' in options ? !!options.leading : leading; - trailing = 'trailing' in options ? !!options.trailing : trailing; - } - return debounce(func, wait, { - 'leading': leading, - 'maxWait': wait, - 'trailing': trailing - }); - } - - /** - * Creates a function that accepts up to one argument, ignoring any - * additional arguments. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Function - * @param {Function} func The function to cap arguments for. - * @returns {Function} Returns the new capped function. - * @example - * - * _.map(['6', '8', '10'], _.unary(parseInt)); - * // => [6, 8, 10] - */ - function unary(func) { - return ary(func, 1); - } - - /** - * Creates a function that provides `value` to `wrapper` as its first - * argument. Any additional arguments provided to the function are appended - * to those provided to the `wrapper`. The wrapper is invoked with the `this` - * binding of the created function. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Function - * @param {*} value The value to wrap. - * @param {Function} [wrapper=identity] The wrapper function. - * @returns {Function} Returns the new function. - * @example - * - * var p = _.wrap(_.escape, function(func, text) { - * return '

' + func(text) + '

'; - * }); - * - * p('fred, barney, & pebbles'); - * // => '

fred, barney, & pebbles

' - */ - function wrap(value, wrapper) { - return partial(castFunction(wrapper), value); - } - - /*------------------------------------------------------------------------*/ - - /** - * Casts `value` as an array if it's not one. - * - * @static - * @memberOf _ - * @since 4.4.0 - * @category Lang - * @param {*} value The value to inspect. - * @returns {Array} Returns the cast array. - * @example - * - * _.castArray(1); - * // => [1] - * - * _.castArray({ 'a': 1 }); - * // => [{ 'a': 1 }] - * - * _.castArray('abc'); - * // => ['abc'] - * - * _.castArray(null); - * // => [null] - * - * _.castArray(undefined); - * // => [undefined] - * - * _.castArray(); - * // => [] - * - * var array = [1, 2, 3]; - * console.log(_.castArray(array) === array); - * // => true - */ - function castArray() { - if (!arguments.length) { - return []; - } - var value = arguments[0]; - return isArray(value) ? value : [value]; - } - - /** - * Creates a shallow clone of `value`. - * - * **Note:** This method is loosely based on the - * [structured clone algorithm](https://mdn.io/Structured_clone_algorithm) - * and supports cloning arrays, array buffers, booleans, date objects, maps, - * numbers, `Object` objects, regexes, sets, strings, symbols, and typed - * arrays. The own enumerable properties of `arguments` objects are cloned - * as plain objects. An empty object is returned for uncloneable values such - * as error objects, functions, DOM nodes, and WeakMaps. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to clone. - * @returns {*} Returns the cloned value. - * @see _.cloneDeep - * @example - * - * var objects = [{ 'a': 1 }, { 'b': 2 }]; - * - * var shallow = _.clone(objects); - * console.log(shallow[0] === objects[0]); - * // => true - */ - function clone(value) { - return baseClone(value, CLONE_SYMBOLS_FLAG); - } - - /** - * This method is like `_.clone` except that it accepts `customizer` which - * is invoked to produce the cloned value. If `customizer` returns `undefined`, - * cloning is handled by the method instead. The `customizer` is invoked with - * up to four arguments; (value [, index|key, object, stack]). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to clone. - * @param {Function} [customizer] The function to customize cloning. - * @returns {*} Returns the cloned value. - * @see _.cloneDeepWith - * @example - * - * function customizer(value) { - * if (_.isElement(value)) { - * return value.cloneNode(false); - * } - * } - * - * var el = _.cloneWith(document.body, customizer); - * - * console.log(el === document.body); - * // => false - * console.log(el.nodeName); - * // => 'BODY' - * console.log(el.childNodes.length); - * // => 0 - */ - function cloneWith(value, customizer) { - customizer = typeof customizer == 'function' ? customizer : undefined; - return baseClone(value, CLONE_SYMBOLS_FLAG, customizer); - } - - /** - * This method is like `_.clone` except that it recursively clones `value`. - * - * @static - * @memberOf _ - * @since 1.0.0 - * @category Lang - * @param {*} value The value to recursively clone. - * @returns {*} Returns the deep cloned value. - * @see _.clone - * @example - * - * var objects = [{ 'a': 1 }, { 'b': 2 }]; - * - * var deep = _.cloneDeep(objects); - * console.log(deep[0] === objects[0]); - * // => false - */ - function cloneDeep(value) { - return baseClone(value, CLONE_DEEP_FLAG | CLONE_SYMBOLS_FLAG); - } - - /** - * This method is like `_.cloneWith` except that it recursively clones `value`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to recursively clone. - * @param {Function} [customizer] The function to customize cloning. - * @returns {*} Returns the deep cloned value. - * @see _.cloneWith - * @example - * - * function customizer(value) { - * if (_.isElement(value)) { - * return value.cloneNode(true); - * } - * } - * - * var el = _.cloneDeepWith(document.body, customizer); - * - * console.log(el === document.body); - * // => false - * console.log(el.nodeName); - * // => 'BODY' - * console.log(el.childNodes.length); - * // => 20 - */ - function cloneDeepWith(value, customizer) { - customizer = typeof customizer == 'function' ? customizer : undefined; - return baseClone(value, CLONE_DEEP_FLAG | CLONE_SYMBOLS_FLAG, customizer); - } - - /** - * Checks if `object` conforms to `source` by invoking the predicate - * properties of `source` with the corresponding property values of `object`. - * - * **Note:** This method is equivalent to `_.conforms` when `source` is - * partially applied. - * - * @static - * @memberOf _ - * @since 4.14.0 - * @category Lang - * @param {Object} object The object to inspect. - * @param {Object} source The object of property predicates to conform to. - * @returns {boolean} Returns `true` if `object` conforms, else `false`. - * @example - * - * var object = { 'a': 1, 'b': 2 }; - * - * _.conformsTo(object, { 'b': function(n) { return n > 1; } }); - * // => true - * - * _.conformsTo(object, { 'b': function(n) { return n > 2; } }); - * // => false - */ - function conformsTo(object, source) { - return source == null || baseConformsTo(object, source, keys(source)); - } - - /** - * Performs a - * [`SameValueZero`](http://ecma-international.org/ecma-262/7.0/#sec-samevaluezero) - * comparison between two values to determine if they are equivalent. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @returns {boolean} Returns `true` if the values are equivalent, else `false`. - * @example - * - * var object = { 'a': 1 }; - * var other = { 'a': 1 }; - * - * _.eq(object, object); - * // => true - * - * _.eq(object, other); - * // => false - * - * _.eq('a', 'a'); - * // => true - * - * _.eq('a', Object('a')); - * // => false - * - * _.eq(NaN, NaN); - * // => true - */ - function eq(value, other) { - return value === other || (value !== value && other !== other); - } - - /** - * Checks if `value` is greater than `other`. - * - * @static - * @memberOf _ - * @since 3.9.0 - * @category Lang - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @returns {boolean} Returns `true` if `value` is greater than `other`, - * else `false`. - * @see _.lt - * @example - * - * _.gt(3, 1); - * // => true - * - * _.gt(3, 3); - * // => false - * - * _.gt(1, 3); - * // => false - */ - var gt = createRelationalOperation(baseGt); - - /** - * Checks if `value` is greater than or equal to `other`. - * - * @static - * @memberOf _ - * @since 3.9.0 - * @category Lang - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @returns {boolean} Returns `true` if `value` is greater than or equal to - * `other`, else `false`. - * @see _.lte - * @example - * - * _.gte(3, 1); - * // => true - * - * _.gte(3, 3); - * // => true - * - * _.gte(1, 3); - * // => false - */ - var gte = createRelationalOperation(function(value, other) { - return value >= other; - }); - - /** - * Checks if `value` is likely an `arguments` object. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is an `arguments` object, - * else `false`. - * @example - * - * _.isArguments(function() { return arguments; }()); - * // => true - * - * _.isArguments([1, 2, 3]); - * // => false - */ - var isArguments = baseIsArguments(function() { return arguments; }()) ? baseIsArguments : function(value) { - return isObjectLike(value) && hasOwnProperty.call(value, 'callee') && - !propertyIsEnumerable.call(value, 'callee'); - }; - - /** - * Checks if `value` is classified as an `Array` object. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is an array, else `false`. - * @example - * - * _.isArray([1, 2, 3]); - * // => true - * - * _.isArray(document.body.children); - * // => false - * - * _.isArray('abc'); - * // => false - * - * _.isArray(_.noop); - * // => false - */ - var isArray = Array.isArray; - - /** - * Checks if `value` is classified as an `ArrayBuffer` object. - * - * @static - * @memberOf _ - * @since 4.3.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is an array buffer, else `false`. - * @example - * - * _.isArrayBuffer(new ArrayBuffer(2)); - * // => true - * - * _.isArrayBuffer(new Array(2)); - * // => false - */ - var isArrayBuffer = nodeIsArrayBuffer ? baseUnary(nodeIsArrayBuffer) : baseIsArrayBuffer; - - /** - * Checks if `value` is array-like. A value is considered array-like if it's - * not a function and has a `value.length` that's an integer greater than or - * equal to `0` and less than or equal to `Number.MAX_SAFE_INTEGER`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is array-like, else `false`. - * @example - * - * _.isArrayLike([1, 2, 3]); - * // => true - * - * _.isArrayLike(document.body.children); - * // => true - * - * _.isArrayLike('abc'); - * // => true - * - * _.isArrayLike(_.noop); - * // => false - */ - function isArrayLike(value) { - return value != null && isLength(value.length) && !isFunction(value); - } - - /** - * This method is like `_.isArrayLike` except that it also checks if `value` - * is an object. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is an array-like object, - * else `false`. - * @example - * - * _.isArrayLikeObject([1, 2, 3]); - * // => true - * - * _.isArrayLikeObject(document.body.children); - * // => true - * - * _.isArrayLikeObject('abc'); - * // => false - * - * _.isArrayLikeObject(_.noop); - * // => false - */ - function isArrayLikeObject(value) { - return isObjectLike(value) && isArrayLike(value); - } - - /** - * Checks if `value` is classified as a boolean primitive or object. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a boolean, else `false`. - * @example - * - * _.isBoolean(false); - * // => true - * - * _.isBoolean(null); - * // => false - */ - function isBoolean(value) { - return value === true || value === false || - (isObjectLike(value) && baseGetTag(value) == boolTag); - } - - /** - * Checks if `value` is a buffer. - * - * @static - * @memberOf _ - * @since 4.3.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a buffer, else `false`. - * @example - * - * _.isBuffer(new Buffer(2)); - * // => true - * - * _.isBuffer(new Uint8Array(2)); - * // => false - */ - var isBuffer = nativeIsBuffer || stubFalse; - - /** - * Checks if `value` is classified as a `Date` object. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a date object, else `false`. - * @example - * - * _.isDate(new Date); - * // => true - * - * _.isDate('Mon April 23 2012'); - * // => false - */ - var isDate = nodeIsDate ? baseUnary(nodeIsDate) : baseIsDate; - - /** - * Checks if `value` is likely a DOM element. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a DOM element, else `false`. - * @example - * - * _.isElement(document.body); - * // => true - * - * _.isElement(''); - * // => false - */ - function isElement(value) { - return isObjectLike(value) && value.nodeType === 1 && !isPlainObject(value); - } - - /** - * Checks if `value` is an empty object, collection, map, or set. - * - * Objects are considered empty if they have no own enumerable string keyed - * properties. - * - * Array-like values such as `arguments` objects, arrays, buffers, strings, or - * jQuery-like collections are considered empty if they have a `length` of `0`. - * Similarly, maps and sets are considered empty if they have a `size` of `0`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is empty, else `false`. - * @example - * - * _.isEmpty(null); - * // => true - * - * _.isEmpty(true); - * // => true - * - * _.isEmpty(1); - * // => true - * - * _.isEmpty([1, 2, 3]); - * // => false - * - * _.isEmpty({ 'a': 1 }); - * // => false - */ - function isEmpty(value) { - if (value == null) { - return true; - } - if (isArrayLike(value) && - (isArray(value) || typeof value == 'string' || typeof value.splice == 'function' || - isBuffer(value) || isTypedArray(value) || isArguments(value))) { - return !value.length; - } - var tag = getTag(value); - if (tag == mapTag || tag == setTag) { - return !value.size; - } - if (isPrototype(value)) { - return !baseKeys(value).length; - } - for (var key in value) { - if (hasOwnProperty.call(value, key)) { - return false; - } - } - return true; - } - - /** - * Performs a deep comparison between two values to determine if they are - * equivalent. - * - * **Note:** This method supports comparing arrays, array buffers, booleans, - * date objects, error objects, maps, numbers, `Object` objects, regexes, - * sets, strings, symbols, and typed arrays. `Object` objects are compared - * by their own, not inherited, enumerable properties. Functions and DOM - * nodes are compared by strict equality, i.e. `===`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @returns {boolean} Returns `true` if the values are equivalent, else `false`. - * @example - * - * var object = { 'a': 1 }; - * var other = { 'a': 1 }; - * - * _.isEqual(object, other); - * // => true - * - * object === other; - * // => false - */ - function isEqual(value, other) { - return baseIsEqual(value, other); - } - - /** - * This method is like `_.isEqual` except that it accepts `customizer` which - * is invoked to compare values. If `customizer` returns `undefined`, comparisons - * are handled by the method instead. The `customizer` is invoked with up to - * six arguments: (objValue, othValue [, index|key, object, other, stack]). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @param {Function} [customizer] The function to customize comparisons. - * @returns {boolean} Returns `true` if the values are equivalent, else `false`. - * @example - * - * function isGreeting(value) { - * return /^h(?:i|ello)$/.test(value); - * } - * - * function customizer(objValue, othValue) { - * if (isGreeting(objValue) && isGreeting(othValue)) { - * return true; - * } - * } - * - * var array = ['hello', 'goodbye']; - * var other = ['hi', 'goodbye']; - * - * _.isEqualWith(array, other, customizer); - * // => true - */ - function isEqualWith(value, other, customizer) { - customizer = typeof customizer == 'function' ? customizer : undefined; - var result = customizer ? customizer(value, other) : undefined; - return result === undefined ? baseIsEqual(value, other, undefined, customizer) : !!result; - } - - /** - * Checks if `value` is an `Error`, `EvalError`, `RangeError`, `ReferenceError`, - * `SyntaxError`, `TypeError`, or `URIError` object. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is an error object, else `false`. - * @example - * - * _.isError(new Error); - * // => true - * - * _.isError(Error); - * // => false - */ - function isError(value) { - if (!isObjectLike(value)) { - return false; - } - var tag = baseGetTag(value); - return tag == errorTag || tag == domExcTag || - (typeof value.message == 'string' && typeof value.name == 'string' && !isPlainObject(value)); - } - - /** - * Checks if `value` is a finite primitive number. - * - * **Note:** This method is based on - * [`Number.isFinite`](https://mdn.io/Number/isFinite). - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a finite number, else `false`. - * @example - * - * _.isFinite(3); - * // => true - * - * _.isFinite(Number.MIN_VALUE); - * // => true - * - * _.isFinite(Infinity); - * // => false - * - * _.isFinite('3'); - * // => false - */ - function isFinite(value) { - return typeof value == 'number' && nativeIsFinite(value); - } - - /** - * Checks if `value` is classified as a `Function` object. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a function, else `false`. - * @example - * - * _.isFunction(_); - * // => true - * - * _.isFunction(/abc/); - * // => false - */ - function isFunction(value) { - if (!isObject(value)) { - return false; - } - // The use of `Object#toString` avoids issues with the `typeof` operator - // in Safari 9 which returns 'object' for typed arrays and other constructors. - var tag = baseGetTag(value); - return tag == funcTag || tag == genTag || tag == asyncTag || tag == proxyTag; - } - - /** - * Checks if `value` is an integer. - * - * **Note:** This method is based on - * [`Number.isInteger`](https://mdn.io/Number/isInteger). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is an integer, else `false`. - * @example - * - * _.isInteger(3); - * // => true - * - * _.isInteger(Number.MIN_VALUE); - * // => false - * - * _.isInteger(Infinity); - * // => false - * - * _.isInteger('3'); - * // => false - */ - function isInteger(value) { - return typeof value == 'number' && value == toInteger(value); - } - - /** - * Checks if `value` is a valid array-like length. - * - * **Note:** This method is loosely based on - * [`ToLength`](http://ecma-international.org/ecma-262/7.0/#sec-tolength). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a valid length, else `false`. - * @example - * - * _.isLength(3); - * // => true - * - * _.isLength(Number.MIN_VALUE); - * // => false - * - * _.isLength(Infinity); - * // => false - * - * _.isLength('3'); - * // => false - */ - function isLength(value) { - return typeof value == 'number' && - value > -1 && value % 1 == 0 && value <= MAX_SAFE_INTEGER; - } - - /** - * Checks if `value` is the - * [language type](http://www.ecma-international.org/ecma-262/7.0/#sec-ecmascript-language-types) - * of `Object`. (e.g. arrays, functions, objects, regexes, `new Number(0)`, and `new String('')`) - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is an object, else `false`. - * @example - * - * _.isObject({}); - * // => true - * - * _.isObject([1, 2, 3]); - * // => true - * - * _.isObject(_.noop); - * // => true - * - * _.isObject(null); - * // => false - */ - function isObject(value) { - var type = typeof value; - return value != null && (type == 'object' || type == 'function'); - } - - /** - * Checks if `value` is object-like. A value is object-like if it's not `null` - * and has a `typeof` result of "object". - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is object-like, else `false`. - * @example - * - * _.isObjectLike({}); - * // => true - * - * _.isObjectLike([1, 2, 3]); - * // => true - * - * _.isObjectLike(_.noop); - * // => false - * - * _.isObjectLike(null); - * // => false - */ - function isObjectLike(value) { - return value != null && typeof value == 'object'; - } - - /** - * Checks if `value` is classified as a `Map` object. - * - * @static - * @memberOf _ - * @since 4.3.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a map, else `false`. - * @example - * - * _.isMap(new Map); - * // => true - * - * _.isMap(new WeakMap); - * // => false - */ - var isMap = nodeIsMap ? baseUnary(nodeIsMap) : baseIsMap; - - /** - * Performs a partial deep comparison between `object` and `source` to - * determine if `object` contains equivalent property values. - * - * **Note:** This method is equivalent to `_.matches` when `source` is - * partially applied. - * - * Partial comparisons will match empty array and empty object `source` - * values against any array or object value, respectively. See `_.isEqual` - * for a list of supported value comparisons. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Lang - * @param {Object} object The object to inspect. - * @param {Object} source The object of property values to match. - * @returns {boolean} Returns `true` if `object` is a match, else `false`. - * @example - * - * var object = { 'a': 1, 'b': 2 }; - * - * _.isMatch(object, { 'b': 2 }); - * // => true - * - * _.isMatch(object, { 'b': 1 }); - * // => false - */ - function isMatch(object, source) { - return object === source || baseIsMatch(object, source, getMatchData(source)); - } - - /** - * This method is like `_.isMatch` except that it accepts `customizer` which - * is invoked to compare values. If `customizer` returns `undefined`, comparisons - * are handled by the method instead. The `customizer` is invoked with five - * arguments: (objValue, srcValue, index|key, object, source). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {Object} object The object to inspect. - * @param {Object} source The object of property values to match. - * @param {Function} [customizer] The function to customize comparisons. - * @returns {boolean} Returns `true` if `object` is a match, else `false`. - * @example - * - * function isGreeting(value) { - * return /^h(?:i|ello)$/.test(value); - * } - * - * function customizer(objValue, srcValue) { - * if (isGreeting(objValue) && isGreeting(srcValue)) { - * return true; - * } - * } - * - * var object = { 'greeting': 'hello' }; - * var source = { 'greeting': 'hi' }; - * - * _.isMatchWith(object, source, customizer); - * // => true - */ - function isMatchWith(object, source, customizer) { - customizer = typeof customizer == 'function' ? customizer : undefined; - return baseIsMatch(object, source, getMatchData(source), customizer); - } - - /** - * Checks if `value` is `NaN`. - * - * **Note:** This method is based on - * [`Number.isNaN`](https://mdn.io/Number/isNaN) and is not the same as - * global [`isNaN`](https://mdn.io/isNaN) which returns `true` for - * `undefined` and other non-number values. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is `NaN`, else `false`. - * @example - * - * _.isNaN(NaN); - * // => true - * - * _.isNaN(new Number(NaN)); - * // => true - * - * isNaN(undefined); - * // => true - * - * _.isNaN(undefined); - * // => false - */ - function isNaN(value) { - // An `NaN` primitive is the only value that is not equal to itself. - // Perform the `toStringTag` check first to avoid errors with some - // ActiveX objects in IE. - return isNumber(value) && value != +value; - } - - /** - * Checks if `value` is a pristine native function. - * - * **Note:** This method can't reliably detect native functions in the presence - * of the core-js package because core-js circumvents this kind of detection. - * Despite multiple requests, the core-js maintainer has made it clear: any - * attempt to fix the detection will be obstructed. As a result, we're left - * with little choice but to throw an error. Unfortunately, this also affects - * packages, like [babel-polyfill](https://www.npmjs.com/package/babel-polyfill), - * which rely on core-js. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a native function, - * else `false`. - * @example - * - * _.isNative(Array.prototype.push); - * // => true - * - * _.isNative(_); - * // => false - */ - function isNative(value) { - if (isMaskable(value)) { - throw new Error(CORE_ERROR_TEXT); - } - return baseIsNative(value); - } - - /** - * Checks if `value` is `null`. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is `null`, else `false`. - * @example - * - * _.isNull(null); - * // => true - * - * _.isNull(void 0); - * // => false - */ - function isNull(value) { - return value === null; - } - - /** - * Checks if `value` is `null` or `undefined`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is nullish, else `false`. - * @example - * - * _.isNil(null); - * // => true - * - * _.isNil(void 0); - * // => true - * - * _.isNil(NaN); - * // => false - */ - function isNil(value) { - return value == null; - } - - /** - * Checks if `value` is classified as a `Number` primitive or object. - * - * **Note:** To exclude `Infinity`, `-Infinity`, and `NaN`, which are - * classified as numbers, use the `_.isFinite` method. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a number, else `false`. - * @example - * - * _.isNumber(3); - * // => true - * - * _.isNumber(Number.MIN_VALUE); - * // => true - * - * _.isNumber(Infinity); - * // => true - * - * _.isNumber('3'); - * // => false - */ - function isNumber(value) { - return typeof value == 'number' || - (isObjectLike(value) && baseGetTag(value) == numberTag); - } - - /** - * Checks if `value` is a plain object, that is, an object created by the - * `Object` constructor or one with a `[[Prototype]]` of `null`. - * - * @static - * @memberOf _ - * @since 0.8.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a plain object, else `false`. - * @example - * - * function Foo() { - * this.a = 1; - * } - * - * _.isPlainObject(new Foo); - * // => false - * - * _.isPlainObject([1, 2, 3]); - * // => false - * - * _.isPlainObject({ 'x': 0, 'y': 0 }); - * // => true - * - * _.isPlainObject(Object.create(null)); - * // => true - */ - function isPlainObject(value) { - if (!isObjectLike(value) || baseGetTag(value) != objectTag) { - return false; - } - var proto = getPrototype(value); - if (proto === null) { - return true; - } - var Ctor = hasOwnProperty.call(proto, 'constructor') && proto.constructor; - return typeof Ctor == 'function' && Ctor instanceof Ctor && - funcToString.call(Ctor) == objectCtorString; - } - - /** - * Checks if `value` is classified as a `RegExp` object. - * - * @static - * @memberOf _ - * @since 0.1.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a regexp, else `false`. - * @example - * - * _.isRegExp(/abc/); - * // => true - * - * _.isRegExp('/abc/'); - * // => false - */ - var isRegExp = nodeIsRegExp ? baseUnary(nodeIsRegExp) : baseIsRegExp; - - /** - * Checks if `value` is a safe integer. An integer is safe if it's an IEEE-754 - * double precision number which isn't the result of a rounded unsafe integer. - * - * **Note:** This method is based on - * [`Number.isSafeInteger`](https://mdn.io/Number/isSafeInteger). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a safe integer, else `false`. - * @example - * - * _.isSafeInteger(3); - * // => true - * - * _.isSafeInteger(Number.MIN_VALUE); - * // => false - * - * _.isSafeInteger(Infinity); - * // => false - * - * _.isSafeInteger('3'); - * // => false - */ - function isSafeInteger(value) { - return isInteger(value) && value >= -MAX_SAFE_INTEGER && value <= MAX_SAFE_INTEGER; - } - - /** - * Checks if `value` is classified as a `Set` object. - * - * @static - * @memberOf _ - * @since 4.3.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a set, else `false`. - * @example - * - * _.isSet(new Set); - * // => true - * - * _.isSet(new WeakSet); - * // => false - */ - var isSet = nodeIsSet ? baseUnary(nodeIsSet) : baseIsSet; - - /** - * Checks if `value` is classified as a `String` primitive or object. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a string, else `false`. - * @example - * - * _.isString('abc'); - * // => true - * - * _.isString(1); - * // => false - */ - function isString(value) { - return typeof value == 'string' || - (!isArray(value) && isObjectLike(value) && baseGetTag(value) == stringTag); - } - - /** - * Checks if `value` is classified as a `Symbol` primitive or object. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a symbol, else `false`. - * @example - * - * _.isSymbol(Symbol.iterator); - * // => true - * - * _.isSymbol('abc'); - * // => false - */ - function isSymbol(value) { - return typeof value == 'symbol' || - (isObjectLike(value) && baseGetTag(value) == symbolTag); - } - - /** - * Checks if `value` is classified as a typed array. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a typed array, else `false`. - * @example - * - * _.isTypedArray(new Uint8Array); - * // => true - * - * _.isTypedArray([]); - * // => false - */ - var isTypedArray = nodeIsTypedArray ? baseUnary(nodeIsTypedArray) : baseIsTypedArray; - - /** - * Checks if `value` is `undefined`. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is `undefined`, else `false`. - * @example - * - * _.isUndefined(void 0); - * // => true - * - * _.isUndefined(null); - * // => false - */ - function isUndefined(value) { - return value === undefined; - } - - /** - * Checks if `value` is classified as a `WeakMap` object. - * - * @static - * @memberOf _ - * @since 4.3.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a weak map, else `false`. - * @example - * - * _.isWeakMap(new WeakMap); - * // => true - * - * _.isWeakMap(new Map); - * // => false - */ - function isWeakMap(value) { - return isObjectLike(value) && getTag(value) == weakMapTag; - } - - /** - * Checks if `value` is classified as a `WeakSet` object. - * - * @static - * @memberOf _ - * @since 4.3.0 - * @category Lang - * @param {*} value The value to check. - * @returns {boolean} Returns `true` if `value` is a weak set, else `false`. - * @example - * - * _.isWeakSet(new WeakSet); - * // => true - * - * _.isWeakSet(new Set); - * // => false - */ - function isWeakSet(value) { - return isObjectLike(value) && baseGetTag(value) == weakSetTag; - } - - /** - * Checks if `value` is less than `other`. - * - * @static - * @memberOf _ - * @since 3.9.0 - * @category Lang - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @returns {boolean} Returns `true` if `value` is less than `other`, - * else `false`. - * @see _.gt - * @example - * - * _.lt(1, 3); - * // => true - * - * _.lt(3, 3); - * // => false - * - * _.lt(3, 1); - * // => false - */ - var lt = createRelationalOperation(baseLt); - - /** - * Checks if `value` is less than or equal to `other`. - * - * @static - * @memberOf _ - * @since 3.9.0 - * @category Lang - * @param {*} value The value to compare. - * @param {*} other The other value to compare. - * @returns {boolean} Returns `true` if `value` is less than or equal to - * `other`, else `false`. - * @see _.gte - * @example - * - * _.lte(1, 3); - * // => true - * - * _.lte(3, 3); - * // => true - * - * _.lte(3, 1); - * // => false - */ - var lte = createRelationalOperation(function(value, other) { - return value <= other; - }); - - /** - * Converts `value` to an array. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Lang - * @param {*} value The value to convert. - * @returns {Array} Returns the converted array. - * @example - * - * _.toArray({ 'a': 1, 'b': 2 }); - * // => [1, 2] - * - * _.toArray('abc'); - * // => ['a', 'b', 'c'] - * - * _.toArray(1); - * // => [] - * - * _.toArray(null); - * // => [] - */ - function toArray(value) { - if (!value) { - return []; - } - if (isArrayLike(value)) { - return isString(value) ? stringToArray(value) : copyArray(value); - } - if (symIterator && value[symIterator]) { - return iteratorToArray(value[symIterator]()); - } - var tag = getTag(value), - func = tag == mapTag ? mapToArray : (tag == setTag ? setToArray : values); - - return func(value); - } - - /** - * Converts `value` to a finite number. - * - * @static - * @memberOf _ - * @since 4.12.0 - * @category Lang - * @param {*} value The value to convert. - * @returns {number} Returns the converted number. - * @example - * - * _.toFinite(3.2); - * // => 3.2 - * - * _.toFinite(Number.MIN_VALUE); - * // => 5e-324 - * - * _.toFinite(Infinity); - * // => 1.7976931348623157e+308 - * - * _.toFinite('3.2'); - * // => 3.2 - */ - function toFinite(value) { - if (!value) { - return value === 0 ? value : 0; - } - value = toNumber(value); - if (value === INFINITY || value === -INFINITY) { - var sign = (value < 0 ? -1 : 1); - return sign * MAX_INTEGER; - } - return value === value ? value : 0; - } - - /** - * Converts `value` to an integer. - * - * **Note:** This method is loosely based on - * [`ToInteger`](http://www.ecma-international.org/ecma-262/7.0/#sec-tointeger). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to convert. - * @returns {number} Returns the converted integer. - * @example - * - * _.toInteger(3.2); - * // => 3 - * - * _.toInteger(Number.MIN_VALUE); - * // => 0 - * - * _.toInteger(Infinity); - * // => 1.7976931348623157e+308 - * - * _.toInteger('3.2'); - * // => 3 - */ - function toInteger(value) { - var result = toFinite(value), - remainder = result % 1; - - return result === result ? (remainder ? result - remainder : result) : 0; - } - - /** - * Converts `value` to an integer suitable for use as the length of an - * array-like object. - * - * **Note:** This method is based on - * [`ToLength`](http://ecma-international.org/ecma-262/7.0/#sec-tolength). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to convert. - * @returns {number} Returns the converted integer. - * @example - * - * _.toLength(3.2); - * // => 3 - * - * _.toLength(Number.MIN_VALUE); - * // => 0 - * - * _.toLength(Infinity); - * // => 4294967295 - * - * _.toLength('3.2'); - * // => 3 - */ - function toLength(value) { - return value ? baseClamp(toInteger(value), 0, MAX_ARRAY_LENGTH) : 0; - } - - /** - * Converts `value` to a number. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to process. - * @returns {number} Returns the number. - * @example - * - * _.toNumber(3.2); - * // => 3.2 - * - * _.toNumber(Number.MIN_VALUE); - * // => 5e-324 - * - * _.toNumber(Infinity); - * // => Infinity - * - * _.toNumber('3.2'); - * // => 3.2 - */ - function toNumber(value) { - if (typeof value == 'number') { - return value; - } - if (isSymbol(value)) { - return NAN; - } - if (isObject(value)) { - var other = typeof value.valueOf == 'function' ? value.valueOf() : value; - value = isObject(other) ? (other + '') : other; - } - if (typeof value != 'string') { - return value === 0 ? value : +value; - } - value = value.replace(reTrim, ''); - var isBinary = reIsBinary.test(value); - return (isBinary || reIsOctal.test(value)) - ? freeParseInt(value.slice(2), isBinary ? 2 : 8) - : (reIsBadHex.test(value) ? NAN : +value); - } - - /** - * Converts `value` to a plain object flattening inherited enumerable string - * keyed properties of `value` to own properties of the plain object. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Lang - * @param {*} value The value to convert. - * @returns {Object} Returns the converted plain object. - * @example - * - * function Foo() { - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.assign({ 'a': 1 }, new Foo); - * // => { 'a': 1, 'b': 2 } - * - * _.assign({ 'a': 1 }, _.toPlainObject(new Foo)); - * // => { 'a': 1, 'b': 2, 'c': 3 } - */ - function toPlainObject(value) { - return copyObject(value, keysIn(value)); - } - - /** - * Converts `value` to a safe integer. A safe integer can be compared and - * represented correctly. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to convert. - * @returns {number} Returns the converted integer. - * @example - * - * _.toSafeInteger(3.2); - * // => 3 - * - * _.toSafeInteger(Number.MIN_VALUE); - * // => 0 - * - * _.toSafeInteger(Infinity); - * // => 9007199254740991 - * - * _.toSafeInteger('3.2'); - * // => 3 - */ - function toSafeInteger(value) { - return value - ? baseClamp(toInteger(value), -MAX_SAFE_INTEGER, MAX_SAFE_INTEGER) - : (value === 0 ? value : 0); - } - - /** - * Converts `value` to a string. An empty string is returned for `null` - * and `undefined` values. The sign of `-0` is preserved. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Lang - * @param {*} value The value to convert. - * @returns {string} Returns the converted string. - * @example - * - * _.toString(null); - * // => '' - * - * _.toString(-0); - * // => '-0' - * - * _.toString([1, 2, 3]); - * // => '1,2,3' - */ - function toString(value) { - return value == null ? '' : baseToString(value); - } - - /*------------------------------------------------------------------------*/ - - /** - * Assigns own enumerable string keyed properties of source objects to the - * destination object. Source objects are applied from left to right. - * Subsequent sources overwrite property assignments of previous sources. - * - * **Note:** This method mutates `object` and is loosely based on - * [`Object.assign`](https://mdn.io/Object/assign). - * - * @static - * @memberOf _ - * @since 0.10.0 - * @category Object - * @param {Object} object The destination object. - * @param {...Object} [sources] The source objects. - * @returns {Object} Returns `object`. - * @see _.assignIn - * @example - * - * function Foo() { - * this.a = 1; - * } - * - * function Bar() { - * this.c = 3; - * } - * - * Foo.prototype.b = 2; - * Bar.prototype.d = 4; - * - * _.assign({ 'a': 0 }, new Foo, new Bar); - * // => { 'a': 1, 'c': 3 } - */ - var assign = createAssigner(function(object, source) { - if (isPrototype(source) || isArrayLike(source)) { - copyObject(source, keys(source), object); - return; - } - for (var key in source) { - if (hasOwnProperty.call(source, key)) { - assignValue(object, key, source[key]); - } - } - }); - - /** - * This method is like `_.assign` except that it iterates over own and - * inherited source properties. - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @alias extend - * @category Object - * @param {Object} object The destination object. - * @param {...Object} [sources] The source objects. - * @returns {Object} Returns `object`. - * @see _.assign - * @example - * - * function Foo() { - * this.a = 1; - * } - * - * function Bar() { - * this.c = 3; - * } - * - * Foo.prototype.b = 2; - * Bar.prototype.d = 4; - * - * _.assignIn({ 'a': 0 }, new Foo, new Bar); - * // => { 'a': 1, 'b': 2, 'c': 3, 'd': 4 } - */ - var assignIn = createAssigner(function(object, source) { - copyObject(source, keysIn(source), object); - }); - - /** - * This method is like `_.assignIn` except that it accepts `customizer` - * which is invoked to produce the assigned values. If `customizer` returns - * `undefined`, assignment is handled by the method instead. The `customizer` - * is invoked with five arguments: (objValue, srcValue, key, object, source). - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @alias extendWith - * @category Object - * @param {Object} object The destination object. - * @param {...Object} sources The source objects. - * @param {Function} [customizer] The function to customize assigned values. - * @returns {Object} Returns `object`. - * @see _.assignWith - * @example - * - * function customizer(objValue, srcValue) { - * return _.isUndefined(objValue) ? srcValue : objValue; - * } - * - * var defaults = _.partialRight(_.assignInWith, customizer); - * - * defaults({ 'a': 1 }, { 'b': 2 }, { 'a': 3 }); - * // => { 'a': 1, 'b': 2 } - */ - var assignInWith = createAssigner(function(object, source, srcIndex, customizer) { - copyObject(source, keysIn(source), object, customizer); - }); - - /** - * This method is like `_.assign` except that it accepts `customizer` - * which is invoked to produce the assigned values. If `customizer` returns - * `undefined`, assignment is handled by the method instead. The `customizer` - * is invoked with five arguments: (objValue, srcValue, key, object, source). - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Object - * @param {Object} object The destination object. - * @param {...Object} sources The source objects. - * @param {Function} [customizer] The function to customize assigned values. - * @returns {Object} Returns `object`. - * @see _.assignInWith - * @example - * - * function customizer(objValue, srcValue) { - * return _.isUndefined(objValue) ? srcValue : objValue; - * } - * - * var defaults = _.partialRight(_.assignWith, customizer); - * - * defaults({ 'a': 1 }, { 'b': 2 }, { 'a': 3 }); - * // => { 'a': 1, 'b': 2 } - */ - var assignWith = createAssigner(function(object, source, srcIndex, customizer) { - copyObject(source, keys(source), object, customizer); - }); - - /** - * Creates an array of values corresponding to `paths` of `object`. - * - * @static - * @memberOf _ - * @since 1.0.0 - * @category Object - * @param {Object} object The object to iterate over. - * @param {...(string|string[])} [paths] The property paths to pick. - * @returns {Array} Returns the picked values. - * @example - * - * var object = { 'a': [{ 'b': { 'c': 3 } }, 4] }; - * - * _.at(object, ['a[0].b.c', 'a[1]']); - * // => [3, 4] - */ - var at = flatRest(baseAt); - - /** - * Creates an object that inherits from the `prototype` object. If a - * `properties` object is given, its own enumerable string keyed properties - * are assigned to the created object. - * - * @static - * @memberOf _ - * @since 2.3.0 - * @category Object - * @param {Object} prototype The object to inherit from. - * @param {Object} [properties] The properties to assign to the object. - * @returns {Object} Returns the new object. - * @example - * - * function Shape() { - * this.x = 0; - * this.y = 0; - * } - * - * function Circle() { - * Shape.call(this); - * } - * - * Circle.prototype = _.create(Shape.prototype, { - * 'constructor': Circle - * }); - * - * var circle = new Circle; - * circle instanceof Circle; - * // => true - * - * circle instanceof Shape; - * // => true - */ - function create(prototype, properties) { - var result = baseCreate(prototype); - return properties == null ? result : baseAssign(result, properties); - } - - /** - * Assigns own and inherited enumerable string keyed properties of source - * objects to the destination object for all destination properties that - * resolve to `undefined`. Source objects are applied from left to right. - * Once a property is set, additional values of the same property are ignored. - * - * **Note:** This method mutates `object`. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Object - * @param {Object} object The destination object. - * @param {...Object} [sources] The source objects. - * @returns {Object} Returns `object`. - * @see _.defaultsDeep - * @example - * - * _.defaults({ 'a': 1 }, { 'b': 2 }, { 'a': 3 }); - * // => { 'a': 1, 'b': 2 } - */ - var defaults = baseRest(function(object, sources) { - object = Object(object); - - var index = -1; - var length = sources.length; - var guard = length > 2 ? sources[2] : undefined; - - if (guard && isIterateeCall(sources[0], sources[1], guard)) { - length = 1; - } - - while (++index < length) { - var source = sources[index]; - var props = keysIn(source); - var propsIndex = -1; - var propsLength = props.length; - - while (++propsIndex < propsLength) { - var key = props[propsIndex]; - var value = object[key]; - - if (value === undefined || - (eq(value, objectProto[key]) && !hasOwnProperty.call(object, key))) { - object[key] = source[key]; - } - } - } - - return object; - }); - - /** - * This method is like `_.defaults` except that it recursively assigns - * default properties. - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 3.10.0 - * @category Object - * @param {Object} object The destination object. - * @param {...Object} [sources] The source objects. - * @returns {Object} Returns `object`. - * @see _.defaults - * @example - * - * _.defaultsDeep({ 'a': { 'b': 2 } }, { 'a': { 'b': 1, 'c': 3 } }); - * // => { 'a': { 'b': 2, 'c': 3 } } - */ - var defaultsDeep = baseRest(function(args) { - args.push(undefined, customDefaultsMerge); - return apply(mergeWith, undefined, args); - }); - - /** - * This method is like `_.find` except that it returns the key of the first - * element `predicate` returns truthy for instead of the element itself. - * - * @static - * @memberOf _ - * @since 1.1.0 - * @category Object - * @param {Object} object The object to inspect. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {string|undefined} Returns the key of the matched element, - * else `undefined`. - * @example - * - * var users = { - * 'barney': { 'age': 36, 'active': true }, - * 'fred': { 'age': 40, 'active': false }, - * 'pebbles': { 'age': 1, 'active': true } - * }; - * - * _.findKey(users, function(o) { return o.age < 40; }); - * // => 'barney' (iteration order is not guaranteed) - * - * // The `_.matches` iteratee shorthand. - * _.findKey(users, { 'age': 1, 'active': true }); - * // => 'pebbles' - * - * // The `_.matchesProperty` iteratee shorthand. - * _.findKey(users, ['active', false]); - * // => 'fred' - * - * // The `_.property` iteratee shorthand. - * _.findKey(users, 'active'); - * // => 'barney' - */ - function findKey(object, predicate) { - return baseFindKey(object, getIteratee(predicate, 3), baseForOwn); - } - - /** - * This method is like `_.findKey` except that it iterates over elements of - * a collection in the opposite order. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @category Object - * @param {Object} object The object to inspect. - * @param {Function} [predicate=_.identity] The function invoked per iteration. - * @returns {string|undefined} Returns the key of the matched element, - * else `undefined`. - * @example - * - * var users = { - * 'barney': { 'age': 36, 'active': true }, - * 'fred': { 'age': 40, 'active': false }, - * 'pebbles': { 'age': 1, 'active': true } - * }; - * - * _.findLastKey(users, function(o) { return o.age < 40; }); - * // => returns 'pebbles' assuming `_.findKey` returns 'barney' - * - * // The `_.matches` iteratee shorthand. - * _.findLastKey(users, { 'age': 36, 'active': true }); - * // => 'barney' - * - * // The `_.matchesProperty` iteratee shorthand. - * _.findLastKey(users, ['active', false]); - * // => 'fred' - * - * // The `_.property` iteratee shorthand. - * _.findLastKey(users, 'active'); - * // => 'pebbles' - */ - function findLastKey(object, predicate) { - return baseFindKey(object, getIteratee(predicate, 3), baseForOwnRight); - } - - /** - * Iterates over own and inherited enumerable string keyed properties of an - * object and invokes `iteratee` for each property. The iteratee is invoked - * with three arguments: (value, key, object). Iteratee functions may exit - * iteration early by explicitly returning `false`. - * - * @static - * @memberOf _ - * @since 0.3.0 - * @category Object - * @param {Object} object The object to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Object} Returns `object`. - * @see _.forInRight - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.forIn(new Foo, function(value, key) { - * console.log(key); - * }); - * // => Logs 'a', 'b', then 'c' (iteration order is not guaranteed). - */ - function forIn(object, iteratee) { - return object == null - ? object - : baseFor(object, getIteratee(iteratee, 3), keysIn); - } - - /** - * This method is like `_.forIn` except that it iterates over properties of - * `object` in the opposite order. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @category Object - * @param {Object} object The object to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Object} Returns `object`. - * @see _.forIn - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.forInRight(new Foo, function(value, key) { - * console.log(key); - * }); - * // => Logs 'c', 'b', then 'a' assuming `_.forIn` logs 'a', 'b', then 'c'. - */ - function forInRight(object, iteratee) { - return object == null - ? object - : baseForRight(object, getIteratee(iteratee, 3), keysIn); - } - - /** - * Iterates over own enumerable string keyed properties of an object and - * invokes `iteratee` for each property. The iteratee is invoked with three - * arguments: (value, key, object). Iteratee functions may exit iteration - * early by explicitly returning `false`. - * - * @static - * @memberOf _ - * @since 0.3.0 - * @category Object - * @param {Object} object The object to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Object} Returns `object`. - * @see _.forOwnRight - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.forOwn(new Foo, function(value, key) { - * console.log(key); - * }); - * // => Logs 'a' then 'b' (iteration order is not guaranteed). - */ - function forOwn(object, iteratee) { - return object && baseForOwn(object, getIteratee(iteratee, 3)); - } - - /** - * This method is like `_.forOwn` except that it iterates over properties of - * `object` in the opposite order. - * - * @static - * @memberOf _ - * @since 2.0.0 - * @category Object - * @param {Object} object The object to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Object} Returns `object`. - * @see _.forOwn - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.forOwnRight(new Foo, function(value, key) { - * console.log(key); - * }); - * // => Logs 'b' then 'a' assuming `_.forOwn` logs 'a' then 'b'. - */ - function forOwnRight(object, iteratee) { - return object && baseForOwnRight(object, getIteratee(iteratee, 3)); - } - - /** - * Creates an array of function property names from own enumerable properties - * of `object`. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Object - * @param {Object} object The object to inspect. - * @returns {Array} Returns the function names. - * @see _.functionsIn - * @example - * - * function Foo() { - * this.a = _.constant('a'); - * this.b = _.constant('b'); - * } - * - * Foo.prototype.c = _.constant('c'); - * - * _.functions(new Foo); - * // => ['a', 'b'] - */ - function functions(object) { - return object == null ? [] : baseFunctions(object, keys(object)); - } - - /** - * Creates an array of function property names from own and inherited - * enumerable properties of `object`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Object - * @param {Object} object The object to inspect. - * @returns {Array} Returns the function names. - * @see _.functions - * @example - * - * function Foo() { - * this.a = _.constant('a'); - * this.b = _.constant('b'); - * } - * - * Foo.prototype.c = _.constant('c'); - * - * _.functionsIn(new Foo); - * // => ['a', 'b', 'c'] - */ - function functionsIn(object) { - return object == null ? [] : baseFunctions(object, keysIn(object)); - } - - /** - * Gets the value at `path` of `object`. If the resolved value is - * `undefined`, the `defaultValue` is returned in its place. - * - * @static - * @memberOf _ - * @since 3.7.0 - * @category Object - * @param {Object} object The object to query. - * @param {Array|string} path The path of the property to get. - * @param {*} [defaultValue] The value returned for `undefined` resolved values. - * @returns {*} Returns the resolved value. - * @example - * - * var object = { 'a': [{ 'b': { 'c': 3 } }] }; - * - * _.get(object, 'a[0].b.c'); - * // => 3 - * - * _.get(object, ['a', '0', 'b', 'c']); - * // => 3 - * - * _.get(object, 'a.b.c', 'default'); - * // => 'default' - */ - function get(object, path, defaultValue) { - var result = object == null ? undefined : baseGet(object, path); - return result === undefined ? defaultValue : result; - } - - /** - * Checks if `path` is a direct property of `object`. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Object - * @param {Object} object The object to query. - * @param {Array|string} path The path to check. - * @returns {boolean} Returns `true` if `path` exists, else `false`. - * @example - * - * var object = { 'a': { 'b': 2 } }; - * var other = _.create({ 'a': _.create({ 'b': 2 }) }); - * - * _.has(object, 'a'); - * // => true - * - * _.has(object, 'a.b'); - * // => true - * - * _.has(object, ['a', 'b']); - * // => true - * - * _.has(other, 'a'); - * // => false - */ - function has(object, path) { - return object != null && hasPath(object, path, baseHas); - } - - /** - * Checks if `path` is a direct or inherited property of `object`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Object - * @param {Object} object The object to query. - * @param {Array|string} path The path to check. - * @returns {boolean} Returns `true` if `path` exists, else `false`. - * @example - * - * var object = _.create({ 'a': _.create({ 'b': 2 }) }); - * - * _.hasIn(object, 'a'); - * // => true - * - * _.hasIn(object, 'a.b'); - * // => true - * - * _.hasIn(object, ['a', 'b']); - * // => true - * - * _.hasIn(object, 'b'); - * // => false - */ - function hasIn(object, path) { - return object != null && hasPath(object, path, baseHasIn); - } - - /** - * Creates an object composed of the inverted keys and values of `object`. - * If `object` contains duplicate values, subsequent values overwrite - * property assignments of previous values. - * - * @static - * @memberOf _ - * @since 0.7.0 - * @category Object - * @param {Object} object The object to invert. - * @returns {Object} Returns the new inverted object. - * @example - * - * var object = { 'a': 1, 'b': 2, 'c': 1 }; - * - * _.invert(object); - * // => { '1': 'c', '2': 'b' } - */ - var invert = createInverter(function(result, value, key) { - if (value != null && - typeof value.toString != 'function') { - value = nativeObjectToString.call(value); - } - - result[value] = key; - }, constant(identity)); - - /** - * This method is like `_.invert` except that the inverted object is generated - * from the results of running each element of `object` thru `iteratee`. The - * corresponding inverted value of each inverted key is an array of keys - * responsible for generating the inverted value. The iteratee is invoked - * with one argument: (value). - * - * @static - * @memberOf _ - * @since 4.1.0 - * @category Object - * @param {Object} object The object to invert. - * @param {Function} [iteratee=_.identity] The iteratee invoked per element. - * @returns {Object} Returns the new inverted object. - * @example - * - * var object = { 'a': 1, 'b': 2, 'c': 1 }; - * - * _.invertBy(object); - * // => { '1': ['a', 'c'], '2': ['b'] } - * - * _.invertBy(object, function(value) { - * return 'group' + value; - * }); - * // => { 'group1': ['a', 'c'], 'group2': ['b'] } - */ - var invertBy = createInverter(function(result, value, key) { - if (value != null && - typeof value.toString != 'function') { - value = nativeObjectToString.call(value); - } - - if (hasOwnProperty.call(result, value)) { - result[value].push(key); - } else { - result[value] = [key]; - } - }, getIteratee); - - /** - * Invokes the method at `path` of `object`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Object - * @param {Object} object The object to query. - * @param {Array|string} path The path of the method to invoke. - * @param {...*} [args] The arguments to invoke the method with. - * @returns {*} Returns the result of the invoked method. - * @example - * - * var object = { 'a': [{ 'b': { 'c': [1, 2, 3, 4] } }] }; - * - * _.invoke(object, 'a[0].b.c.slice', 1, 3); - * // => [2, 3] - */ - var invoke = baseRest(baseInvoke); - - /** - * Creates an array of the own enumerable property names of `object`. - * - * **Note:** Non-object values are coerced to objects. See the - * [ES spec](http://ecma-international.org/ecma-262/7.0/#sec-object.keys) - * for more details. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Object - * @param {Object} object The object to query. - * @returns {Array} Returns the array of property names. - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.keys(new Foo); - * // => ['a', 'b'] (iteration order is not guaranteed) - * - * _.keys('hi'); - * // => ['0', '1'] - */ - function keys(object) { - return isArrayLike(object) ? arrayLikeKeys(object) : baseKeys(object); - } - - /** - * Creates an array of the own and inherited enumerable property names of `object`. - * - * **Note:** Non-object values are coerced to objects. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Object - * @param {Object} object The object to query. - * @returns {Array} Returns the array of property names. - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.keysIn(new Foo); - * // => ['a', 'b', 'c'] (iteration order is not guaranteed) - */ - function keysIn(object) { - return isArrayLike(object) ? arrayLikeKeys(object, true) : baseKeysIn(object); - } - - /** - * The opposite of `_.mapValues`; this method creates an object with the - * same values as `object` and keys generated by running each own enumerable - * string keyed property of `object` thru `iteratee`. The iteratee is invoked - * with three arguments: (value, key, object). - * - * @static - * @memberOf _ - * @since 3.8.0 - * @category Object - * @param {Object} object The object to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Object} Returns the new mapped object. - * @see _.mapValues - * @example - * - * _.mapKeys({ 'a': 1, 'b': 2 }, function(value, key) { - * return key + value; - * }); - * // => { 'a1': 1, 'b2': 2 } - */ - function mapKeys(object, iteratee) { - var result = {}; - iteratee = getIteratee(iteratee, 3); - - baseForOwn(object, function(value, key, object) { - baseAssignValue(result, iteratee(value, key, object), value); - }); - return result; - } - - /** - * Creates an object with the same keys as `object` and values generated - * by running each own enumerable string keyed property of `object` thru - * `iteratee`. The iteratee is invoked with three arguments: - * (value, key, object). - * - * @static - * @memberOf _ - * @since 2.4.0 - * @category Object - * @param {Object} object The object to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @returns {Object} Returns the new mapped object. - * @see _.mapKeys - * @example - * - * var users = { - * 'fred': { 'user': 'fred', 'age': 40 }, - * 'pebbles': { 'user': 'pebbles', 'age': 1 } - * }; - * - * _.mapValues(users, function(o) { return o.age; }); - * // => { 'fred': 40, 'pebbles': 1 } (iteration order is not guaranteed) - * - * // The `_.property` iteratee shorthand. - * _.mapValues(users, 'age'); - * // => { 'fred': 40, 'pebbles': 1 } (iteration order is not guaranteed) - */ - function mapValues(object, iteratee) { - var result = {}; - iteratee = getIteratee(iteratee, 3); - - baseForOwn(object, function(value, key, object) { - baseAssignValue(result, key, iteratee(value, key, object)); - }); - return result; - } - - /** - * This method is like `_.assign` except that it recursively merges own and - * inherited enumerable string keyed properties of source objects into the - * destination object. Source properties that resolve to `undefined` are - * skipped if a destination value exists. Array and plain object properties - * are merged recursively. Other objects and value types are overridden by - * assignment. Source objects are applied from left to right. Subsequent - * sources overwrite property assignments of previous sources. - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 0.5.0 - * @category Object - * @param {Object} object The destination object. - * @param {...Object} [sources] The source objects. - * @returns {Object} Returns `object`. - * @example - * - * var object = { - * 'a': [{ 'b': 2 }, { 'd': 4 }] - * }; - * - * var other = { - * 'a': [{ 'c': 3 }, { 'e': 5 }] - * }; - * - * _.merge(object, other); - * // => { 'a': [{ 'b': 2, 'c': 3 }, { 'd': 4, 'e': 5 }] } - */ - var merge = createAssigner(function(object, source, srcIndex) { - baseMerge(object, source, srcIndex); - }); - - /** - * This method is like `_.merge` except that it accepts `customizer` which - * is invoked to produce the merged values of the destination and source - * properties. If `customizer` returns `undefined`, merging is handled by the - * method instead. The `customizer` is invoked with six arguments: - * (objValue, srcValue, key, object, source, stack). - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Object - * @param {Object} object The destination object. - * @param {...Object} sources The source objects. - * @param {Function} customizer The function to customize assigned values. - * @returns {Object} Returns `object`. - * @example - * - * function customizer(objValue, srcValue) { - * if (_.isArray(objValue)) { - * return objValue.concat(srcValue); - * } - * } - * - * var object = { 'a': [1], 'b': [2] }; - * var other = { 'a': [3], 'b': [4] }; - * - * _.mergeWith(object, other, customizer); - * // => { 'a': [1, 3], 'b': [2, 4] } - */ - var mergeWith = createAssigner(function(object, source, srcIndex, customizer) { - baseMerge(object, source, srcIndex, customizer); - }); - - /** - * The opposite of `_.pick`; this method creates an object composed of the - * own and inherited enumerable property paths of `object` that are not omitted. - * - * **Note:** This method is considerably slower than `_.pick`. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Object - * @param {Object} object The source object. - * @param {...(string|string[])} [paths] The property paths to omit. - * @returns {Object} Returns the new object. - * @example - * - * var object = { 'a': 1, 'b': '2', 'c': 3 }; - * - * _.omit(object, ['a', 'c']); - * // => { 'b': '2' } - */ - var omit = flatRest(function(object, paths) { - var result = {}; - if (object == null) { - return result; - } - var isDeep = false; - paths = arrayMap(paths, function(path) { - path = castPath(path, object); - isDeep || (isDeep = path.length > 1); - return path; - }); - copyObject(object, getAllKeysIn(object), result); - if (isDeep) { - result = baseClone(result, CLONE_DEEP_FLAG | CLONE_FLAT_FLAG | CLONE_SYMBOLS_FLAG, customOmitClone); - } - var length = paths.length; - while (length--) { - baseUnset(result, paths[length]); - } - return result; - }); - - /** - * The opposite of `_.pickBy`; this method creates an object composed of - * the own and inherited enumerable string keyed properties of `object` that - * `predicate` doesn't return truthy for. The predicate is invoked with two - * arguments: (value, key). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Object - * @param {Object} object The source object. - * @param {Function} [predicate=_.identity] The function invoked per property. - * @returns {Object} Returns the new object. - * @example - * - * var object = { 'a': 1, 'b': '2', 'c': 3 }; - * - * _.omitBy(object, _.isNumber); - * // => { 'b': '2' } - */ - function omitBy(object, predicate) { - return pickBy(object, negate(getIteratee(predicate))); - } - - /** - * Creates an object composed of the picked `object` properties. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Object - * @param {Object} object The source object. - * @param {...(string|string[])} [paths] The property paths to pick. - * @returns {Object} Returns the new object. - * @example - * - * var object = { 'a': 1, 'b': '2', 'c': 3 }; - * - * _.pick(object, ['a', 'c']); - * // => { 'a': 1, 'c': 3 } - */ - var pick = flatRest(function(object, paths) { - return object == null ? {} : basePick(object, paths); - }); - - /** - * Creates an object composed of the `object` properties `predicate` returns - * truthy for. The predicate is invoked with two arguments: (value, key). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Object - * @param {Object} object The source object. - * @param {Function} [predicate=_.identity] The function invoked per property. - * @returns {Object} Returns the new object. - * @example - * - * var object = { 'a': 1, 'b': '2', 'c': 3 }; - * - * _.pickBy(object, _.isNumber); - * // => { 'a': 1, 'c': 3 } - */ - function pickBy(object, predicate) { - if (object == null) { - return {}; - } - var props = arrayMap(getAllKeysIn(object), function(prop) { - return [prop]; - }); - predicate = getIteratee(predicate); - return basePickBy(object, props, function(value, path) { - return predicate(value, path[0]); - }); - } - - /** - * This method is like `_.get` except that if the resolved value is a - * function it's invoked with the `this` binding of its parent object and - * its result is returned. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Object - * @param {Object} object The object to query. - * @param {Array|string} path The path of the property to resolve. - * @param {*} [defaultValue] The value returned for `undefined` resolved values. - * @returns {*} Returns the resolved value. - * @example - * - * var object = { 'a': [{ 'b': { 'c1': 3, 'c2': _.constant(4) } }] }; - * - * _.result(object, 'a[0].b.c1'); - * // => 3 - * - * _.result(object, 'a[0].b.c2'); - * // => 4 - * - * _.result(object, 'a[0].b.c3', 'default'); - * // => 'default' - * - * _.result(object, 'a[0].b.c3', _.constant('default')); - * // => 'default' - */ - function result(object, path, defaultValue) { - path = castPath(path, object); - - var index = -1, - length = path.length; - - // Ensure the loop is entered when path is empty. - if (!length) { - length = 1; - object = undefined; - } - while (++index < length) { - var value = object == null ? undefined : object[toKey(path[index])]; - if (value === undefined) { - index = length; - value = defaultValue; - } - object = isFunction(value) ? value.call(object) : value; - } - return object; - } - - /** - * Sets the value at `path` of `object`. If a portion of `path` doesn't exist, - * it's created. Arrays are created for missing index properties while objects - * are created for all other missing properties. Use `_.setWith` to customize - * `path` creation. - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 3.7.0 - * @category Object - * @param {Object} object The object to modify. - * @param {Array|string} path The path of the property to set. - * @param {*} value The value to set. - * @returns {Object} Returns `object`. - * @example - * - * var object = { 'a': [{ 'b': { 'c': 3 } }] }; - * - * _.set(object, 'a[0].b.c', 4); - * console.log(object.a[0].b.c); - * // => 4 - * - * _.set(object, ['x', '0', 'y', 'z'], 5); - * console.log(object.x[0].y.z); - * // => 5 - */ - function set(object, path, value) { - return object == null ? object : baseSet(object, path, value); - } - - /** - * This method is like `_.set` except that it accepts `customizer` which is - * invoked to produce the objects of `path`. If `customizer` returns `undefined` - * path creation is handled by the method instead. The `customizer` is invoked - * with three arguments: (nsValue, key, nsObject). - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Object - * @param {Object} object The object to modify. - * @param {Array|string} path The path of the property to set. - * @param {*} value The value to set. - * @param {Function} [customizer] The function to customize assigned values. - * @returns {Object} Returns `object`. - * @example - * - * var object = {}; - * - * _.setWith(object, '[0][1]', 'a', Object); - * // => { '0': { '1': 'a' } } - */ - function setWith(object, path, value, customizer) { - customizer = typeof customizer == 'function' ? customizer : undefined; - return object == null ? object : baseSet(object, path, value, customizer); - } - - /** - * Creates an array of own enumerable string keyed-value pairs for `object` - * which can be consumed by `_.fromPairs`. If `object` is a map or set, its - * entries are returned. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @alias entries - * @category Object - * @param {Object} object The object to query. - * @returns {Array} Returns the key-value pairs. - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.toPairs(new Foo); - * // => [['a', 1], ['b', 2]] (iteration order is not guaranteed) - */ - var toPairs = createToPairs(keys); - - /** - * Creates an array of own and inherited enumerable string keyed-value pairs - * for `object` which can be consumed by `_.fromPairs`. If `object` is a map - * or set, its entries are returned. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @alias entriesIn - * @category Object - * @param {Object} object The object to query. - * @returns {Array} Returns the key-value pairs. - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.toPairsIn(new Foo); - * // => [['a', 1], ['b', 2], ['c', 3]] (iteration order is not guaranteed) - */ - var toPairsIn = createToPairs(keysIn); - - /** - * An alternative to `_.reduce`; this method transforms `object` to a new - * `accumulator` object which is the result of running each of its own - * enumerable string keyed properties thru `iteratee`, with each invocation - * potentially mutating the `accumulator` object. If `accumulator` is not - * provided, a new object with the same `[[Prototype]]` will be used. The - * iteratee is invoked with four arguments: (accumulator, value, key, object). - * Iteratee functions may exit iteration early by explicitly returning `false`. - * - * @static - * @memberOf _ - * @since 1.3.0 - * @category Object - * @param {Object} object The object to iterate over. - * @param {Function} [iteratee=_.identity] The function invoked per iteration. - * @param {*} [accumulator] The custom accumulator value. - * @returns {*} Returns the accumulated value. - * @example - * - * _.transform([2, 3, 4], function(result, n) { - * result.push(n *= n); - * return n % 2 == 0; - * }, []); - * // => [4, 9] - * - * _.transform({ 'a': 1, 'b': 2, 'c': 1 }, function(result, value, key) { - * (result[value] || (result[value] = [])).push(key); - * }, {}); - * // => { '1': ['a', 'c'], '2': ['b'] } - */ - function transform(object, iteratee, accumulator) { - var isArr = isArray(object), - isArrLike = isArr || isBuffer(object) || isTypedArray(object); - - iteratee = getIteratee(iteratee, 4); - if (accumulator == null) { - var Ctor = object && object.constructor; - if (isArrLike) { - accumulator = isArr ? new Ctor : []; - } - else if (isObject(object)) { - accumulator = isFunction(Ctor) ? baseCreate(getPrototype(object)) : {}; - } - else { - accumulator = {}; - } - } - (isArrLike ? arrayEach : baseForOwn)(object, function(value, index, object) { - return iteratee(accumulator, value, index, object); - }); - return accumulator; - } - - /** - * Removes the property at `path` of `object`. - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Object - * @param {Object} object The object to modify. - * @param {Array|string} path The path of the property to unset. - * @returns {boolean} Returns `true` if the property is deleted, else `false`. - * @example - * - * var object = { 'a': [{ 'b': { 'c': 7 } }] }; - * _.unset(object, 'a[0].b.c'); - * // => true - * - * console.log(object); - * // => { 'a': [{ 'b': {} }] }; - * - * _.unset(object, ['a', '0', 'b', 'c']); - * // => true - * - * console.log(object); - * // => { 'a': [{ 'b': {} }] }; - */ - function unset(object, path) { - return object == null ? true : baseUnset(object, path); - } - - /** - * This method is like `_.set` except that accepts `updater` to produce the - * value to set. Use `_.updateWith` to customize `path` creation. The `updater` - * is invoked with one argument: (value). - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 4.6.0 - * @category Object - * @param {Object} object The object to modify. - * @param {Array|string} path The path of the property to set. - * @param {Function} updater The function to produce the updated value. - * @returns {Object} Returns `object`. - * @example - * - * var object = { 'a': [{ 'b': { 'c': 3 } }] }; - * - * _.update(object, 'a[0].b.c', function(n) { return n * n; }); - * console.log(object.a[0].b.c); - * // => 9 - * - * _.update(object, 'x[0].y.z', function(n) { return n ? n + 1 : 0; }); - * console.log(object.x[0].y.z); - * // => 0 - */ - function update(object, path, updater) { - return object == null ? object : baseUpdate(object, path, castFunction(updater)); - } - - /** - * This method is like `_.update` except that it accepts `customizer` which is - * invoked to produce the objects of `path`. If `customizer` returns `undefined` - * path creation is handled by the method instead. The `customizer` is invoked - * with three arguments: (nsValue, key, nsObject). - * - * **Note:** This method mutates `object`. - * - * @static - * @memberOf _ - * @since 4.6.0 - * @category Object - * @param {Object} object The object to modify. - * @param {Array|string} path The path of the property to set. - * @param {Function} updater The function to produce the updated value. - * @param {Function} [customizer] The function to customize assigned values. - * @returns {Object} Returns `object`. - * @example - * - * var object = {}; - * - * _.updateWith(object, '[0][1]', _.constant('a'), Object); - * // => { '0': { '1': 'a' } } - */ - function updateWith(object, path, updater, customizer) { - customizer = typeof customizer == 'function' ? customizer : undefined; - return object == null ? object : baseUpdate(object, path, castFunction(updater), customizer); - } - - /** - * Creates an array of the own enumerable string keyed property values of `object`. - * - * **Note:** Non-object values are coerced to objects. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category Object - * @param {Object} object The object to query. - * @returns {Array} Returns the array of property values. - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.values(new Foo); - * // => [1, 2] (iteration order is not guaranteed) - * - * _.values('hi'); - * // => ['h', 'i'] - */ - function values(object) { - return object == null ? [] : baseValues(object, keys(object)); - } - - /** - * Creates an array of the own and inherited enumerable string keyed property - * values of `object`. - * - * **Note:** Non-object values are coerced to objects. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category Object - * @param {Object} object The object to query. - * @returns {Array} Returns the array of property values. - * @example - * - * function Foo() { - * this.a = 1; - * this.b = 2; - * } - * - * Foo.prototype.c = 3; - * - * _.valuesIn(new Foo); - * // => [1, 2, 3] (iteration order is not guaranteed) - */ - function valuesIn(object) { - return object == null ? [] : baseValues(object, keysIn(object)); - } - - /*------------------------------------------------------------------------*/ - - /** - * Clamps `number` within the inclusive `lower` and `upper` bounds. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category Number - * @param {number} number The number to clamp. - * @param {number} [lower] The lower bound. - * @param {number} upper The upper bound. - * @returns {number} Returns the clamped number. - * @example - * - * _.clamp(-10, -5, 5); - * // => -5 - * - * _.clamp(10, -5, 5); - * // => 5 - */ - function clamp(number, lower, upper) { - if (upper === undefined) { - upper = lower; - lower = undefined; - } - if (upper !== undefined) { - upper = toNumber(upper); - upper = upper === upper ? upper : 0; - } - if (lower !== undefined) { - lower = toNumber(lower); - lower = lower === lower ? lower : 0; - } - return baseClamp(toNumber(number), lower, upper); - } - - /** - * Checks if `n` is between `start` and up to, but not including, `end`. If - * `end` is not specified, it's set to `start` with `start` then set to `0`. - * If `start` is greater than `end` the params are swapped to support - * negative ranges. - * - * @static - * @memberOf _ - * @since 3.3.0 - * @category Number - * @param {number} number The number to check. - * @param {number} [start=0] The start of the range. - * @param {number} end The end of the range. - * @returns {boolean} Returns `true` if `number` is in the range, else `false`. - * @see _.range, _.rangeRight - * @example - * - * _.inRange(3, 2, 4); - * // => true - * - * _.inRange(4, 8); - * // => true - * - * _.inRange(4, 2); - * // => false - * - * _.inRange(2, 2); - * // => false - * - * _.inRange(1.2, 2); - * // => true - * - * _.inRange(5.2, 4); - * // => false - * - * _.inRange(-3, -2, -6); - * // => true - */ - function inRange(number, start, end) { - start = toFinite(start); - if (end === undefined) { - end = start; - start = 0; - } else { - end = toFinite(end); - } - number = toNumber(number); - return baseInRange(number, start, end); - } - - /** - * Produces a random number between the inclusive `lower` and `upper` bounds. - * If only one argument is provided a number between `0` and the given number - * is returned. If `floating` is `true`, or either `lower` or `upper` are - * floats, a floating-point number is returned instead of an integer. - * - * **Note:** JavaScript follows the IEEE-754 standard for resolving - * floating-point values which can produce unexpected results. - * - * @static - * @memberOf _ - * @since 0.7.0 - * @category Number - * @param {number} [lower=0] The lower bound. - * @param {number} [upper=1] The upper bound. - * @param {boolean} [floating] Specify returning a floating-point number. - * @returns {number} Returns the random number. - * @example - * - * _.random(0, 5); - * // => an integer between 0 and 5 - * - * _.random(5); - * // => also an integer between 0 and 5 - * - * _.random(5, true); - * // => a floating-point number between 0 and 5 - * - * _.random(1.2, 5.2); - * // => a floating-point number between 1.2 and 5.2 - */ - function random(lower, upper, floating) { - if (floating && typeof floating != 'boolean' && isIterateeCall(lower, upper, floating)) { - upper = floating = undefined; - } - if (floating === undefined) { - if (typeof upper == 'boolean') { - floating = upper; - upper = undefined; - } - else if (typeof lower == 'boolean') { - floating = lower; - lower = undefined; - } - } - if (lower === undefined && upper === undefined) { - lower = 0; - upper = 1; - } - else { - lower = toFinite(lower); - if (upper === undefined) { - upper = lower; - lower = 0; - } else { - upper = toFinite(upper); - } - } - if (lower > upper) { - var temp = lower; - lower = upper; - upper = temp; - } - if (floating || lower % 1 || upper % 1) { - var rand = nativeRandom(); - return nativeMin(lower + (rand * (upper - lower + freeParseFloat('1e-' + ((rand + '').length - 1)))), upper); - } - return baseRandom(lower, upper); - } - - /*------------------------------------------------------------------------*/ - - /** - * Converts `string` to [camel case](https://en.wikipedia.org/wiki/CamelCase). - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to convert. - * @returns {string} Returns the camel cased string. - * @example - * - * _.camelCase('Foo Bar'); - * // => 'fooBar' - * - * _.camelCase('--foo-bar--'); - * // => 'fooBar' - * - * _.camelCase('__FOO_BAR__'); - * // => 'fooBar' - */ - var camelCase = createCompounder(function(result, word, index) { - word = word.toLowerCase(); - return result + (index ? capitalize(word) : word); - }); - - /** - * Converts the first character of `string` to upper case and the remaining - * to lower case. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to capitalize. - * @returns {string} Returns the capitalized string. - * @example - * - * _.capitalize('FRED'); - * // => 'Fred' - */ - function capitalize(string) { - return upperFirst(toString(string).toLowerCase()); - } - - /** - * Deburrs `string` by converting - * [Latin-1 Supplement](https://en.wikipedia.org/wiki/Latin-1_Supplement_(Unicode_block)#Character_table) - * and [Latin Extended-A](https://en.wikipedia.org/wiki/Latin_Extended-A) - * letters to basic Latin letters and removing - * [combining diacritical marks](https://en.wikipedia.org/wiki/Combining_Diacritical_Marks). - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to deburr. - * @returns {string} Returns the deburred string. - * @example - * - * _.deburr('déjà vu'); - * // => 'deja vu' - */ - function deburr(string) { - string = toString(string); - return string && string.replace(reLatin, deburrLetter).replace(reComboMark, ''); - } - - /** - * Checks if `string` ends with the given target string. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to inspect. - * @param {string} [target] The string to search for. - * @param {number} [position=string.length] The position to search up to. - * @returns {boolean} Returns `true` if `string` ends with `target`, - * else `false`. - * @example - * - * _.endsWith('abc', 'c'); - * // => true - * - * _.endsWith('abc', 'b'); - * // => false - * - * _.endsWith('abc', 'b', 2); - * // => true - */ - function endsWith(string, target, position) { - string = toString(string); - target = baseToString(target); - - var length = string.length; - position = position === undefined - ? length - : baseClamp(toInteger(position), 0, length); - - var end = position; - position -= target.length; - return position >= 0 && string.slice(position, end) == target; - } - - /** - * Converts the characters "&", "<", ">", '"', and "'" in `string` to their - * corresponding HTML entities. - * - * **Note:** No other characters are escaped. To escape additional - * characters use a third-party library like [_he_](https://mths.be/he). - * - * Though the ">" character is escaped for symmetry, characters like - * ">" and "/" don't need escaping in HTML and have no special meaning - * unless they're part of a tag or unquoted attribute value. See - * [Mathias Bynens's article](https://mathiasbynens.be/notes/ambiguous-ampersands) - * (under "semi-related fun fact") for more details. - * - * When working with HTML you should always - * [quote attribute values](http://wonko.com/post/html-escaping) to reduce - * XSS vectors. - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category String - * @param {string} [string=''] The string to escape. - * @returns {string} Returns the escaped string. - * @example - * - * _.escape('fred, barney, & pebbles'); - * // => 'fred, barney, & pebbles' - */ - function escape(string) { - string = toString(string); - return (string && reHasUnescapedHtml.test(string)) - ? string.replace(reUnescapedHtml, escapeHtmlChar) - : string; - } - - /** - * Escapes the `RegExp` special characters "^", "$", "\", ".", "*", "+", - * "?", "(", ")", "[", "]", "{", "}", and "|" in `string`. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to escape. - * @returns {string} Returns the escaped string. - * @example - * - * _.escapeRegExp('[lodash](https://lodash.com/)'); - * // => '\[lodash\]\(https://lodash\.com/\)' - */ - function escapeRegExp(string) { - string = toString(string); - return (string && reHasRegExpChar.test(string)) - ? string.replace(reRegExpChar, '\\$&') - : string; - } - - /** - * Converts `string` to - * [kebab case](https://en.wikipedia.org/wiki/Letter_case#Special_case_styles). - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to convert. - * @returns {string} Returns the kebab cased string. - * @example - * - * _.kebabCase('Foo Bar'); - * // => 'foo-bar' - * - * _.kebabCase('fooBar'); - * // => 'foo-bar' - * - * _.kebabCase('__FOO_BAR__'); - * // => 'foo-bar' - */ - var kebabCase = createCompounder(function(result, word, index) { - return result + (index ? '-' : '') + word.toLowerCase(); - }); - - /** - * Converts `string`, as space separated words, to lower case. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category String - * @param {string} [string=''] The string to convert. - * @returns {string} Returns the lower cased string. - * @example - * - * _.lowerCase('--Foo-Bar--'); - * // => 'foo bar' - * - * _.lowerCase('fooBar'); - * // => 'foo bar' - * - * _.lowerCase('__FOO_BAR__'); - * // => 'foo bar' - */ - var lowerCase = createCompounder(function(result, word, index) { - return result + (index ? ' ' : '') + word.toLowerCase(); - }); - - /** - * Converts the first character of `string` to lower case. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category String - * @param {string} [string=''] The string to convert. - * @returns {string} Returns the converted string. - * @example - * - * _.lowerFirst('Fred'); - * // => 'fred' - * - * _.lowerFirst('FRED'); - * // => 'fRED' - */ - var lowerFirst = createCaseFirst('toLowerCase'); - - /** - * Pads `string` on the left and right sides if it's shorter than `length`. - * Padding characters are truncated if they can't be evenly divided by `length`. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to pad. - * @param {number} [length=0] The padding length. - * @param {string} [chars=' '] The string used as padding. - * @returns {string} Returns the padded string. - * @example - * - * _.pad('abc', 8); - * // => ' abc ' - * - * _.pad('abc', 8, '_-'); - * // => '_-abc_-_' - * - * _.pad('abc', 3); - * // => 'abc' - */ - function pad(string, length, chars) { - string = toString(string); - length = toInteger(length); - - var strLength = length ? stringSize(string) : 0; - if (!length || strLength >= length) { - return string; - } - var mid = (length - strLength) / 2; - return ( - createPadding(nativeFloor(mid), chars) + - string + - createPadding(nativeCeil(mid), chars) - ); - } - - /** - * Pads `string` on the right side if it's shorter than `length`. Padding - * characters are truncated if they exceed `length`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category String - * @param {string} [string=''] The string to pad. - * @param {number} [length=0] The padding length. - * @param {string} [chars=' '] The string used as padding. - * @returns {string} Returns the padded string. - * @example - * - * _.padEnd('abc', 6); - * // => 'abc ' - * - * _.padEnd('abc', 6, '_-'); - * // => 'abc_-_' - * - * _.padEnd('abc', 3); - * // => 'abc' - */ - function padEnd(string, length, chars) { - string = toString(string); - length = toInteger(length); - - var strLength = length ? stringSize(string) : 0; - return (length && strLength < length) - ? (string + createPadding(length - strLength, chars)) - : string; - } - - /** - * Pads `string` on the left side if it's shorter than `length`. Padding - * characters are truncated if they exceed `length`. - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category String - * @param {string} [string=''] The string to pad. - * @param {number} [length=0] The padding length. - * @param {string} [chars=' '] The string used as padding. - * @returns {string} Returns the padded string. - * @example - * - * _.padStart('abc', 6); - * // => ' abc' - * - * _.padStart('abc', 6, '_-'); - * // => '_-_abc' - * - * _.padStart('abc', 3); - * // => 'abc' - */ - function padStart(string, length, chars) { - string = toString(string); - length = toInteger(length); - - var strLength = length ? stringSize(string) : 0; - return (length && strLength < length) - ? (createPadding(length - strLength, chars) + string) - : string; - } - - /** - * Converts `string` to an integer of the specified radix. If `radix` is - * `undefined` or `0`, a `radix` of `10` is used unless `value` is a - * hexadecimal, in which case a `radix` of `16` is used. - * - * **Note:** This method aligns with the - * [ES5 implementation](https://es5.github.io/#x15.1.2.2) of `parseInt`. - * - * @static - * @memberOf _ - * @since 1.1.0 - * @category String - * @param {string} string The string to convert. - * @param {number} [radix=10] The radix to interpret `value` by. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {number} Returns the converted integer. - * @example - * - * _.parseInt('08'); - * // => 8 - * - * _.map(['6', '08', '10'], _.parseInt); - * // => [6, 8, 10] - */ - function parseInt(string, radix, guard) { - if (guard || radix == null) { - radix = 0; - } else if (radix) { - radix = +radix; - } - return nativeParseInt(toString(string).replace(reTrimStart, ''), radix || 0); - } - - /** - * Repeats the given string `n` times. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to repeat. - * @param {number} [n=1] The number of times to repeat the string. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {string} Returns the repeated string. - * @example - * - * _.repeat('*', 3); - * // => '***' - * - * _.repeat('abc', 2); - * // => 'abcabc' - * - * _.repeat('abc', 0); - * // => '' - */ - function repeat(string, n, guard) { - if ((guard ? isIterateeCall(string, n, guard) : n === undefined)) { - n = 1; - } else { - n = toInteger(n); - } - return baseRepeat(toString(string), n); - } - - /** - * Replaces matches for `pattern` in `string` with `replacement`. - * - * **Note:** This method is based on - * [`String#replace`](https://mdn.io/String/replace). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category String - * @param {string} [string=''] The string to modify. - * @param {RegExp|string} pattern The pattern to replace. - * @param {Function|string} replacement The match replacement. - * @returns {string} Returns the modified string. - * @example - * - * _.replace('Hi Fred', 'Fred', 'Barney'); - * // => 'Hi Barney' - */ - function replace() { - var args = arguments, - string = toString(args[0]); - - return args.length < 3 ? string : string.replace(args[1], args[2]); - } - - /** - * Converts `string` to - * [snake case](https://en.wikipedia.org/wiki/Snake_case). - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to convert. - * @returns {string} Returns the snake cased string. - * @example - * - * _.snakeCase('Foo Bar'); - * // => 'foo_bar' - * - * _.snakeCase('fooBar'); - * // => 'foo_bar' - * - * _.snakeCase('--FOO-BAR--'); - * // => 'foo_bar' - */ - var snakeCase = createCompounder(function(result, word, index) { - return result + (index ? '_' : '') + word.toLowerCase(); - }); - - /** - * Splits `string` by `separator`. - * - * **Note:** This method is based on - * [`String#split`](https://mdn.io/String/split). - * - * @static - * @memberOf _ - * @since 4.0.0 - * @category String - * @param {string} [string=''] The string to split. - * @param {RegExp|string} separator The separator pattern to split by. - * @param {number} [limit] The length to truncate results to. - * @returns {Array} Returns the string segments. - * @example - * - * _.split('a-b-c', '-', 2); - * // => ['a', 'b'] - */ - function split(string, separator, limit) { - if (limit && typeof limit != 'number' && isIterateeCall(string, separator, limit)) { - separator = limit = undefined; - } - limit = limit === undefined ? MAX_ARRAY_LENGTH : limit >>> 0; - if (!limit) { - return []; - } - string = toString(string); - if (string && ( - typeof separator == 'string' || - (separator != null && !isRegExp(separator)) - )) { - separator = baseToString(separator); - if (!separator && hasUnicode(string)) { - return castSlice(stringToArray(string), 0, limit); - } - } - return string.split(separator, limit); - } - - /** - * Converts `string` to - * [start case](https://en.wikipedia.org/wiki/Letter_case#Stylistic_or_specialised_usage). - * - * @static - * @memberOf _ - * @since 3.1.0 - * @category String - * @param {string} [string=''] The string to convert. - * @returns {string} Returns the start cased string. - * @example - * - * _.startCase('--foo-bar--'); - * // => 'Foo Bar' - * - * _.startCase('fooBar'); - * // => 'Foo Bar' - * - * _.startCase('__FOO_BAR__'); - * // => 'FOO BAR' - */ - var startCase = createCompounder(function(result, word, index) { - return result + (index ? ' ' : '') + upperFirst(word); - }); - - /** - * Checks if `string` starts with the given target string. - * - * @static - * @memberOf _ - * @since 3.0.0 - * @category String - * @param {string} [string=''] The string to inspect. - * @param {string} [target] The string to search for. - * @param {number} [position=0] The position to search from. - * @returns {boolean} Returns `true` if `string` starts with `target`, - * else `false`. - * @example - * - * _.startsWith('abc', 'a'); - * // => true - * - * _.startsWith('abc', 'b'); - * // => false - * - * _.startsWith('abc', 'b', 1); - * // => true - */ - function startsWith(string, target, position) { - string = toString(string); - position = position == null - ? 0 - : baseClamp(toInteger(position), 0, string.length); - - target = baseToString(target); - return string.slice(position, position + target.length) == target; - } - - /** - * Creates a compiled template function that can interpolate data properties - * in "interpolate" delimiters, HTML-escape interpolated data properties in - * "escape" delimiters, and execute JavaScript in "evaluate" delimiters. Data - * properties may be accessed as free variables in the template. If a setting - * object is given, it takes precedence over `_.templateSettings` values. - * - * **Note:** In the development build `_.template` utilizes - * [sourceURLs](http://www.html5rocks.com/en/tutorials/developertools/sourcemaps/#toc-sourceurl) - * for easier debugging. - * - * For more information on precompiling templates see - * [lodash's custom builds documentation](https://lodash.com/custom-builds). - * - * For more information on Chrome extension sandboxes see - * [Chrome's extensions documentation](https://developer.chrome.com/extensions/sandboxingEval). - * - * @static - * @since 0.1.0 - * @memberOf _ - * @category String - * @param {string} [string=''] The template string. - * @param {Object} [options={}] The options object. - * @param {RegExp} [options.escape=_.templateSettings.escape] - * The HTML "escape" delimiter. - * @param {RegExp} [options.evaluate=_.templateSettings.evaluate] - * The "evaluate" delimiter. - * @param {Object} [options.imports=_.templateSettings.imports] - * An object to import into the template as free variables. - * @param {RegExp} [options.interpolate=_.templateSettings.interpolate] - * The "interpolate" delimiter. - * @param {string} [options.sourceURL='lodash.templateSources[n]'] - * The sourceURL of the compiled template. - * @param {string} [options.variable='obj'] - * The data object variable name. - * @param- {Object} [guard] Enables use as an iteratee for methods like `_.map`. - * @returns {Function} Returns the compiled template function. - * @example - * - * // Use the "interpolate" delimiter to create a compiled template. - * var compiled = _.template('hello <%= user %>!'); - * compiled({ 'user': 'fred' }); - * // => 'hello fred!' - * - * // Use the HTML "escape" delimiter to escape data property values. - * var compiled = _.template('<%- value %>'); - * compiled({ 'value': '