Skip to content

Commit

Permalink
WX-728 Add configurable WSM client to Cromwell (#6948)
Browse files Browse the repository at this point in the history
* Dependencies

* Compiles but no tests

* Formatting

* Moar exclusions

* Update to latest WSM

* Add additional dependency

* We need some UUID here to make the request

* Formatting

* Clarify what is fake

* Formatting

* Use our own version of Jersey and Jackson stuff

* Port-in Khalid's changes (thank you!)

Co-authored-by: Khalid Shakir <kshakir@broadinstitute.org>

* Test longevity

Don't break the test if someone decides to add a cert to `ws.org`

* Cleanup

* Cleanup

* Cleanup

* Adjust TES config file for CI

Co-authored-by: Janet Gainer-Dewar <jdewar@broadinstitute.org>
Co-authored-by: Khalid Shakir <kshakir@broadinstitute.org>
  • Loading branch information
3 people authored Nov 21, 2022
1 parent 432e72f commit 95e4449
Show file tree
Hide file tree
Showing 10 changed files with 275 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"fileChecksum.inputFile": "https://<storage-account>.blob.core.windows.net/cromwell/user-inputs/inputFile.txt"
}
20 changes: 20 additions & 0 deletions centaur/src/main/resources/standardTestCases/blob_md5/blob_md5.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
task md5 {
File inputFile
command {
echo "`date`: Running checksum on ${inputFile}..."
md5sum ${inputFile} > md5sum.txt
echo "`date`: Checksum is complete."
}
output {
File result = "md5sum.txt"
}
runtime {
docker: 'ubuntu:18.04'
preemptible: true
}
}

workflow fileChecksum {
File inputFile
call md5 { input: inputFile=inputFile}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import scala.util.{Failure, Success, Try}
import com.azure.resourcemanager.storage.models.StorageAccountKey
import com.typesafe.scalalogging.LazyLogging

import java.util.UUID

case class FileSystemAPI() {
def getFileSystem(uri: URI): Try[FileSystem] = Try(FileSystems.getFileSystem(uri))
def newFileSystem(uri: URI, config: Map[String, Object]): FileSystem = FileSystems.newFileSystem(uri, config.asJava)
Expand Down Expand Up @@ -82,31 +84,32 @@ case class BlobFileSystemManager(
sealed trait BlobTokenGenerator {def generateAccessToken: Try[AzureSasCredential]}
object BlobTokenGenerator {
def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, subscription: Option[SubscriptionId]): BlobTokenGenerator = {
createBlobTokenGenerator(container, endpoint, None, None, subscription)
NativeBlobTokenGenerator(container, endpoint, subscription)
}
def createBlobTokenGenerator(container: BlobContainerName,
endpoint: EndpointURL,
workspaceId: Option[WorkspaceId],
workspaceManagerURL: Option[WorkspaceManagerURL],
subscription: Option[SubscriptionId]
): BlobTokenGenerator = {
(container: BlobContainerName, endpoint: EndpointURL, workspaceId, workspaceManagerURL) match {
case (container, endpoint, None, None) =>
NativeBlobTokenGenerator(container, endpoint, subscription)
case (container, endpoint, Some(workspaceId), Some(workspaceManagerURL)) =>
WSMBlobTokenGenerator(container, endpoint, workspaceId, workspaceManagerURL)
case _ =>
throw new Exception("Arguments provided do not match any available BlobTokenGenerator implementation.")
}
def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, workspaceId: WorkspaceId, workspaceManagerClient: WorkspaceManagerApiClientProvider): BlobTokenGenerator = {
WSMBlobTokenGenerator(container, endpoint, workspaceId, workspaceManagerClient)
}
def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL): BlobTokenGenerator = createBlobTokenGenerator(container, endpoint, None)
def createBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, workspaceId: Option[WorkspaceId], workspaceManagerURL: Option[WorkspaceManagerURL]): BlobTokenGenerator =
createBlobTokenGenerator(container, endpoint, workspaceId, workspaceManagerURL, None)

}

case class WSMBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, workspaceId: WorkspaceId, workspaceManagerURL: WorkspaceManagerURL) extends BlobTokenGenerator {
def generateAccessToken: Try[AzureSasCredential] = Failure(new NotImplementedError)
case class WSMBlobTokenGenerator(
container: BlobContainerName,
endpoint: EndpointURL,
workspaceId: WorkspaceId,
wsmClient: WorkspaceManagerApiClientProvider) extends BlobTokenGenerator {

def generateAccessToken: Try[AzureSasCredential] = Try {
val token = wsmClient.getControlledAzureResourceApi.createAzureStorageContainerSasToken(
UUID.fromString(workspaceId.value),
UUID.fromString("00001111-2222-3333-aaaa-bbbbccccdddd"),
null,
null,
null,
null
).getToken // TODO `null` items may be required, investigate in WX-696

new AzureSasCredential(token) // TODO Does `signature` actually mean token? save for WX-696
}
}

case class NativeBlobTokenGenerator(container: BlobContainerName, endpoint: EndpointURL, subscription: Option[SubscriptionId] = None) extends BlobTokenGenerator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,17 @@ final case class BlobPathBuilderFactory(globalConfig: Config, instanceConfig: Co
val workspaceId: Option[WorkspaceId] = instanceConfig.as[Option[String]]("workspace-id").map(WorkspaceId)
val expiryBufferMinutes: Long = instanceConfig.as[Option[Long]]("expiry-buffer-minutes").getOrElse(10)
val workspaceManagerURL: Option[WorkspaceManagerURL] = singletonConfig.config.as[Option[String]]("workspace-manager-url").map(WorkspaceManagerURL)
val b2cToken: Option[String] = instanceConfig.as[Option[String]]("b2cToken")

val blobTokenGenerator: BlobTokenGenerator = (workspaceManagerURL, b2cToken, workspaceId) match {
case (Some(url), Some(token), Some(workspaceId)) =>
val wsmClient: WorkspaceManagerApiClientProvider = new HttpWorkspaceManagerClientProvider(url, token)
// parameterizing client instead of URL to make injecting mock client possible
BlobTokenGenerator.createBlobTokenGenerator(container, endpoint, workspaceId, wsmClient)
case _ =>
BlobTokenGenerator.createBlobTokenGenerator(container, endpoint, subscription)
}

val blobTokenGenerator: BlobTokenGenerator = BlobTokenGenerator.createBlobTokenGenerator(
container, endpoint, workspaceId, workspaceManagerURL, subscription)
val fsm: BlobFileSystemManager = BlobFileSystemManager(container, endpoint, expiryBufferMinutes, blobTokenGenerator)

override def withOptions(options: WorkflowOptions)(implicit as: ActorSystem, ec: ExecutionContext): Future[BlobPathBuilder] = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package cromwell.filesystems.blob

import bio.terra.workspace.api.ControlledAzureResourceApi
import bio.terra.workspace.client.ApiClient

/**
* Represents a way to get various workspace manager clients
*
* Pared down from `org.broadinstitute.dsde.rawls.dataaccess.workspacemanager.WorkspaceManagerApiClientProvider`
*
* For testing, create an anonymous subclass as in `org.broadinstitute.dsde.rawls.dataaccess.workspacemanager.HttpWorkspaceManagerDAOSpec`
*/
trait WorkspaceManagerApiClientProvider {
def getApiClient: ApiClient

def getControlledAzureResourceApi: ControlledAzureResourceApi

}

class HttpWorkspaceManagerClientProvider(baseWorkspaceManagerUrl: WorkspaceManagerURL, token: String) extends WorkspaceManagerApiClientProvider {
def getApiClient: ApiClient = {
val client: ApiClient = new ApiClient()
client.setBasePath(baseWorkspaceManagerUrl.value)
client.setAccessToken(token)

client
}

def getControlledAzureResourceApi: ControlledAzureResourceApi =
new ControlledAzureResourceApi(getApiClient)

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ class BlobPathBuilderFactorySpec extends AnyFlatSpec with Matchers with MockSuga
it should "parse configs for a functioning factory" in {
val endpoint = BlobPathBuilderSpec.buildEndpoint("storageAccount")
val container = BlobContainerName("storageContainer")
val workspaceId = WorkspaceId("mockWorkspaceId")
val workspaceManagerURL = WorkspaceManagerURL("https://test.ws.org")
val workspaceId = WorkspaceId("B0BAFE77-0000-0000-0000-000000000000")
val workspaceManagerURL = WorkspaceManagerURL("https://wsm.example.com")
val instanceConfig = ConfigFactory.parseString(
s"""
|container = "$container"
Expand Down
112 changes: 108 additions & 4 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@ object Dependencies {
private val hsqldbV = "2.6.1"
private val http4sV = "0.21.31" // this release is EOL. We need to upgrade further for cats3. https://http4s.org/versions/
private val jacksonV = "2.13.3"
private val jakartaActivationV = "1.2.1"
private val jakartaAnnotationV = "1.3.5"
private val jakartaInjectV = "2.6.1"
private val jakartaXmlBindApiV = "2.3.2"
private val janinoV = "3.1.7"
private val jerseyV = "2.32" // Use a jersey compatible with WSM. See notes in wsmDependencies below.
private val jsr305V = "3.0.2"
private val junitV = "4.13.2"
private val kindProjectorV = "0.13.2"
Expand Down Expand Up @@ -172,6 +177,16 @@ object Dependencies {
"com.google.api-client" % "google-api-client-jackson2" % googleApiClientV
exclude("com.google.guava", "guava-jdk5"),
"com.google.cloud" % "google-cloud-resourcemanager" % googleCloudResourceManagerV,
/*
The google-cloud-java dependencies have similar issues with using an older javax.* vs. jakarta.* as guice.
google-cloud-java is still using javax.annotation and guice is sticking with javax.inject:
- https://github.com/google/guice/issues/1383
- https://github.com/googleapis/google-cloud-java/blob/v0.201.0/google-cloud-jar-parent/pom.xml#L131-L136
Globally use of jakarta instead of javax until Google does themselves.
The javax.annotation exclusion is below in cromwellExcludeDependencies.
*/
"jakarta.annotation" % "jakarta.annotation-api" % jakartaAnnotationV,
)

val spiDependencies: List[ModuleID] = List(
Expand All @@ -197,6 +212,36 @@ object Dependencies {
"com.azure.resourcemanager" % "azure-resourcemanager" % "2.18.0"
)

val wsmDependencies: List[ModuleID] = List(
"bio.terra" % "workspace-manager-client" % "0.254.452-SNAPSHOT"
exclude("com.sun.activation", "jakarta.activation"),
/*
1. WSM is looking for the rs-api under javax.*.
Jersey 3.x switched to jakarta.ws.rs-api 3.x. If one uses jakarta's rs-api, 3.x will automatically evict 2.x.
However, jakarta's rs-api 2.x provides packages javax.* while 3.x provides jakarta.* instead.
- https://javadoc.io/doc/jakarta.ws.rs/jakarta.ws.rs-api/2.1.6/javax/ws/rs/package-summary.html
- https://javadoc.io/doc/jakarta.ws.rs/jakarta.ws.rs-api/3.1.0/jakarta.ws.rs/module-summary.html
TODO: Perhaps coordinate with the WSM team to use the jakarta 3.x rs-api and jakarta.* instead of javax.*.
2. Use the exact version of jersey that WSM is using.
Jersey libraries cannot be mixed and matched as the various modules cannot be mixed and matched.
For example jersey-client 2.32 is not compatible with jersey-common 2.37.
If needed one may also explicitly enumerate the list of jersey artifacts and explicitly set the versions similar to
catsDepeendencies, akkaHttpDependencies, etc.
- https://broadinstitute.jfrog.io/ui/repos/tree/PomView/libs-snapshot-local/bio/terra/workspace-manager-client/0.254.452-SNAPSHOT/workspace-manager-client-0.254.452-20221114.190249-1.pom
- https://github.com/eclipse-ee4j/jersey/blob/2.32/core-client/src/main/java/org/glassfish/jersey/client/ClientExecutorProvidersConfigurator.java#L139
- https://github.com/eclipse-ee4j/jersey/blob/2.37/core-client/src/main/java/org/glassfish/jersey/client/ClientExecutorProvidersConfigurator.java#L136-L137
*/
"org.glassfish.jersey.inject" % "jersey-hk2" % jerseyV
exclude("com.sun.activation", "jakarta.activation"),
"jakarta.activation" % "jakarta.activation-api" % jakartaActivationV,
)

val implFtpDependencies = List(
"commons-net" % "commons-net" % commonNetV,
"io.github.andrebeat" %% "scala-pool" % scalaPoolV,
Expand Down Expand Up @@ -256,7 +301,11 @@ object Dependencies {
)

private val liquibaseDependencies = List(
// The XML bind API replacement below may be removed when this ticket is addressed:
// https://github.com/liquibase/liquibase/issues/2991
"org.liquibase" % "liquibase-core" % liquibaseV
exclude("javax.xml.bind", "jaxb-api"),
"jakarta.xml.bind" % "jakarta.xml.bind-api" % jakartaXmlBindApiV,
)

private val akkaDependencies = List(
Expand Down Expand Up @@ -323,15 +372,24 @@ object Dependencies {
private val googleCloudDependencies = List(
"io.grpc" % "grpc-core" % grpcV,
"com.google.guava" % "guava" % guavaV,
/*
The google-cloud-nio has the same problems with an ancient inject as guice:
- https://github.com/google/guice/issues/1383
- https://github.com/googleapis/java-storage-nio/blob/v0.124.20/google-cloud-nio/pom.xml#L49-L53
Force use of jakarta instead of javax until Google does themselves.
*/
"com.google.cloud" % "google-cloud-nio" % googleCloudNioV
exclude("com.google.api.grpc", "grpc-google-common-protos")
exclude("com.google.cloud.datastore", "datastore-v1-protos")
exclude("javax.inject", "javax.inject")
exclude("org.apache.httpcomponents", "httpclient"),
"org.broadinstitute.dsde.workbench" %% "workbench-google" % workbenchGoogleV
exclude("com.google.apis", "google-api-services-genomics"),
"org.apache.httpcomponents" % "httpclient" % apacheHttpClientV,
"com.google.apis" % "google-api-services-cloudkms" % googleCloudKmsV
exclude("com.google.guava", "guava-jdk5")
exclude("com.google.guava", "guava-jdk5"),
"org.glassfish.hk2.external" % "jakarta.inject" % jakartaInjectV,
) ++ googleGenomicsV2Alpha1Dependency ++ googleLifeSciencesV2BetaDependency

private val dbmsDependencies = List(
Expand Down Expand Up @@ -407,7 +465,7 @@ object Dependencies {
List("scalatest", "mysql", "mariadb", "postgresql")
.map(name => "com.dimafeng" %% s"testcontainers-scala-$name" % testContainersScalaV % Test)

val blobFileSystemDependencies: List[ModuleID] = azureDependencies
val blobFileSystemDependencies: List[ModuleID] = azureDependencies ++ wsmDependencies

val s3FileSystemDependencies: List[ModuleID] = junitDependencies

Expand All @@ -431,6 +489,8 @@ object Dependencies {
val languageFactoryDependencies = List(
"com.softwaremill.sttp" %% "core" % sttpV,
"com.softwaremill.sttp" %% "async-http-client-backend-cats" % sttpV
exclude("com.sun.activation", "javax.activation"),
"jakarta.activation" % "jakarta.activation-api" % jakartaActivationV,
)

val draft2LanguageFactoryDependencies = List(
Expand All @@ -452,10 +512,16 @@ object Dependencies {
- https://www.slf4j.org/legacy.html#jclOverSLF4J
*/
val owlApiDependencies = List(
// This whole section (incl. javax->jakarta issues) is probably going to be removed when CWL support is removed
// For now, replace javax usage with jakarta.
"net.sourceforge.owlapi" % "owlapi-distribution" % owlApiV
exclude("javax.inject", "javax.inject")
exclude("javax.xml.bind", "jaxb-api")
exclude("org.apache.httpcomponents", "httpclient-osgi")
exclude("org.apache.httpcomponents", "httpcore-osgi")
exclude("org.slf4j", "jcl-over-slf4j"),
"org.glassfish.hk2.external" % "jakarta.inject" % jakartaInjectV,
"jakarta.xml.bind" % "jakarta.xml.bind-api" % jakartaXmlBindApiV,
"org.apache.httpcomponents" % "httpclient-cache" % apacheHttpClientV,
"org.apache.httpcomponents" % "httpclient" % apacheHttpClientV
)
Expand All @@ -480,7 +546,9 @@ object Dependencies {
val coreDependencies: List[ModuleID] = List(
"com.google.auth" % "google-auth-library-oauth2-http" % googleOauth2V,
"com.chuusai" %% "shapeless" % shapelessV,
// NOTE: See scalameter comment under engineDependencies
"com.storm-enroute" %% "scalameter" % scalameterV % Test
exclude("com.fasterxml.jackson.module", "jackson-module-scala_2.13")
exclude("org.scala-lang.modules", "scala-xml_2.13"),
"com.github.scopt" %% "scopt" % scoptV,
) ++ akkaStreamDependencies ++ configDependencies ++ catsDependencies ++ circeDependencies ++
Expand All @@ -507,9 +575,22 @@ object Dependencies {
val engineDependencies: List[ModuleID] = List(
"commons-codec" % "commons-codec" % commonsCodecV,
"commons-io" % "commons-io" % commonsIoV,
/*
Maybe ScalaMeter should be used, but is anyone?
For now keep its dependencies from breaking jackson for other libraries. If someone wants to use it they can
re-fight with dependency-hell at that point.
Avoid:
"com.fasterxml.jackson.databind.JsonMappingException: Scala module 2.11.3 requires Jackson Databind
version >= 2.11.0 and < 2.12.0":
- https://scalameter.github.io/home/gettingstarted/0.7/sbt/index.html
- https://github.com/FasterXML/jackson-module-scala/blob/jackson-module-scala-2.11.3/src/main/scala/com/fasterxml/jackson/module/scala/JacksonModule.scala#L53-L62
*/
"com.storm-enroute" %% "scalameter" % scalameterV
exclude("com.fasterxml.jackson.core", "jackson-databind")
exclude("com.fasterxml.jackson.module", "jackson-module-scala")
exclude("com.fasterxml.jackson.module", "jackson-module-scala_2.13")
exclude("org.scala-tools.testing", "test-interface")
exclude("org.scala-lang.modules", "scala-xml_2.13"),
"com.fasterxml.jackson.core" % "jackson-databind" % jacksonV,
Expand All @@ -525,11 +606,24 @@ object Dependencies {
val serverDependencies: List[ModuleID] = slf4jBindingDependencies

val cromiamDependencies: List[ModuleID] = List(
/*
sttp 1.x was last released in 2019
See above comment regarding "cats-effect, fs2, http4s, and sttp" all needing to update together.
For now, replace sttp 1.x's com.sun.activation usage with the jakarta version.
NOTE when upgrading: sttp 3.x no longer requires an async-http-client-backend-future so jakarta.activation can
probably be removed from the dependencies:
- https://sttp.softwaremill.com/en/v3/backends/future.html#using-async-http-client
- https://sttp.softwaremill.com/en/v2/backends/future.html#using-async-http-client
- https://sttp.softwaremill.com/en/v1/backends/asynchttpclient.html
*/
"com.softwaremill.sttp" %% "core" % sttpV,
"com.softwaremill.sttp" %% "async-http-client-backend-future" % sttpV,
"com.softwaremill.sttp" %% "async-http-client-backend-future" % sttpV
exclude("com.sun.activation", "javax.activation"),
"com.typesafe.scala-logging" %% "scala-logging" % scalaLoggingV,
"org.broadinstitute.dsde.workbench" %% "workbench-model" % workbenchModelV,
"org.broadinstitute.dsde.workbench" %% "workbench-util" % workbenchUtilV
"org.broadinstitute.dsde.workbench" %% "workbench-util" % workbenchUtilV,
"jakarta.activation" % "jakarta.activation-api" % jakartaActivationV,
) ++ akkaHttpDependencies ++ swaggerUiDependencies ++ slf4jBindingDependencies

val wes2cromwellDependencies: List[ModuleID] = coreDependencies ++ akkaHttpDependencies
Expand Down Expand Up @@ -736,5 +830,15 @@ object Dependencies {
val cromwellExcludeDependencies: List[ExclusionRule] = List(
// Replaced with jcl-over-slf4j
ExclusionRule("commons-logging", "commons-logging"),
/*
The google-cloud-java dependencies have similar issues with using an older javax.* vs. jakarta.* as guice.
google-cloud-java is still using javax.annotation and guice is sticking with javax.inject:
- https://github.com/google/guice/issues/1383
- https://github.com/googleapis/google-cloud-java/blob/v0.201.0/google-cloud-jar-parent/pom.xml#L131-L136
Globally use of jakarta instead of javax until Google does themselves.
The jakarta.annotation inclusion is above in googleApiClientDependencies.
*/
ExclusionRule("javax.annotation", "javax.annotation-api"),
)
}
5 changes: 5 additions & 0 deletions project/Publishing.scala
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ object Publishing {
"Broad Artifactory" at
"https://broadinstitute.jfrog.io/broadinstitute/libs-release/"

private val broadArtifactoryResolverSnap: Resolver =
"Broad Artifactory Snapshots" at
"https://broadinstitute.jfrog.io/broadinstitute/libs-snapshot-local/"

// https://stackoverflow.com/questions/9819965/artifactory-snapshot-filename-handling
private val buildTimestamp = System.currentTimeMillis() / 1000

Expand All @@ -159,6 +163,7 @@ object Publishing {

val additionalResolvers = List(
broadArtifactoryResolver,
broadArtifactoryResolverSnap,
Resolver.sonatypeRepo("releases")
)

Expand Down
Loading

0 comments on commit 95e4449

Please sign in to comment.