Merge branch 'master' into feat/gen3-discovery-ai

uc-cdis · Jan 22, 2024 · cb560fc · cb560fc
2 parents 415889d + 87704ae
commit cb560fc
Show file tree

Hide file tree

Showing 21 changed files with 265 additions and 27 deletions.
diff --git a/Docker/jenkins/Jenkins-CI-Worker/Dockerfile b/Docker/jenkins/Jenkins-CI-Worker/Dockerfile
@@ -1,4 +1,4 @@
-FROM jenkins/inbound-agent:jdk11
+FROM jenkins/inbound-agent:jdk21
 
 USER root
 

diff --git a/Docker/jenkins/Jenkins-Worker/Dockerfile b/Docker/jenkins/Jenkins-Worker/Dockerfile
@@ -1,4 +1,4 @@
-FROM jenkins/inbound-agent:jdk11
+FROM jenkins/inbound-agent:jdk21
 
 USER root
 

diff --git a/Docker/jenkins/Jenkins/Dockerfile b/Docker/jenkins/Jenkins/Dockerfile
@@ -1,4 +1,4 @@
-FROM jenkins/jenkins:2.415-jdk11
+FROM jenkins/jenkins:2.439-jdk21
 
 USER root
 

diff --git a/doc/s3-to-google-replication.md b/doc/s3-to-google-replication.md
@@ -0,0 +1,68 @@
+# S3 to Google Cloud Storage Replication Pipeline
+
+This document will guide you through setting up a replication pipeline from AWS S3 to Google Cloud Storage (GCS) using VPC Service Controls and Storage Transfer Service. This solution is compliant with security best practices, ensuring that data transfer between AWS S3 and GCS is secure and efficient.
+
+## Table of Contents
+
+- [Prerequisites](#prerequisites)
+- [Step-by-step Guide](#step-by-step-guide)
+    - [Setup VPC Service Controls](#setup-vpc-service-controls)
+    - [Initiate Storage Transfer Service](#initiate-storage-transfer-service)
+- [Compliance Benefits](#compliance-benefits)
+- [Cost Benefit Analysis](#cost-benefit-analysis)
+
+## Prerequisites
+
+1. **AWS account** with access to the S3 bucket.
+2. **Google Cloud account** with permissions to create buckets in GCS and set up VPC Service Controls and Storage Transfer Service.
+3. Familiarity with AWS IAM for S3 bucket access and Google Cloud IAM for GCS access.
+
+## Step-by-step Guide
+
+### Setup VPC Service Controls
+
+1. **Access the VPC Service Controls** in the Google Cloud Console.
+2. **Create a new VPC Service Control perimeter**.
+    - Name the perimeter and choose the desired region.
+    - Add the necessary GCP services. Ensure to include `storagetransfer.googleapis.com` for Storage Transfer Service.
+3. **Setup VPC Service Control Policy** to allow connections from AWS.
+    - Use the [documentation](https://cloud.google.com/vpc-service-controls/docs/set-up) to help set up.
+
+### Initiate Storage Transfer Service
+
+1. Navigate to **Storage Transfer Service** in the Google Cloud Console.
+2. Click **Create Transfer Job**.
+3. **Select Source**: Choose Amazon S3 bucket and provide the necessary details.
+    - Ensure to have necessary permissions for the S3 bucket in AWS IAM.
+4. **Select Destination**: Choose your GCS bucket.
+5. **Schedule & Advanced Settings**: Set the frequency and conditions for the transfer. Consider setting up notifications for job completion or errors.
+6. **Review & Create**: Confirm the details and initiate the transfer job.
+
+## Compliance Benefits
+
+Setting up a secure replication pipeline from AWS S3 to GCS using VPC Service Controls and Storage Transfer Service offers the following compliance benefits:
+
+1. **Data Security**: The VPC Service Controls provide an additional layer of security by ensuring that the transferred data remains within a defined security perimeter, reducing potential data leak risks.
+2. **Auditability**: Both AWS and GCS offer logging and monitoring tools that can provide audit trails for data transfer. This can help in meeting regulatory compliance requirements.
+3. **Consistent Data Replication**: The Storage Transfer Service ensures that data in GCS is up to date with the source S3 bucket, which is essential for consistent backup and disaster recovery strategies.
+
+## Cost Benefit Analysis
+
+**Benefits**:
+
+1. **Data Redundancy**: Having data stored in multiple cloud providers can be a part of a robust disaster recovery strategy.
+2. **Flexibility**: Replicating data to GCS provides flexibility in multi-cloud strategies, enabling seamless migrations or usage of GCP tools and services.
+3. **Security**: Utilizing VPC Service Controls strengthens the security posture.
+
+**Costs**:
+
+1. **Data Transfer Costs**: Both AWS and Google Cloud might charge for data transfer. It's crucial to analyze the cost, especially for large data transfers.
+2. **Storage Costs**: Storing data redundantly incurs additional storage costs in GCS.
+
+**Analysis**:
+
+To stay in compliance, we require multiple copies of our data in separate datacenters or clouds. After our security audit, we found the important of not keeping data in a single cloud. It may be expensive to transfer data from AWS to GCP and to store it in 2 clouds simultaniously, but if we need to, then this solution could be an easy way to achieve compliance. 
+
+---
+
+Please note that while this guide is based on the provided Google Cloud documentation, it's crucial to refer to the original [documentation](https://cloud.google.com/architecture/transferring-data-from-amazon-s3-to-cloud-storage-using-vpc-service-controls-and-storage-transfer-service) for the most accurate and up-to-date information.
diff --git a/files/scripts/healdata/heal-cedar-data-ingest.py b/files/scripts/healdata/heal-cedar-data-ingest.py
@@ -85,6 +85,8 @@ def update_filter_metadata(metadata_to_update):
     ]
     # Add any new tags from advSearchFilters
     for f in metadata_to_update["advSearchFilters"]:
+        if f["key"] == "Gender":
+            continue
         tag = {"name": f["value"], "category": f["key"]}
         if tag not in tags:
             tags.append(tag)

diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist
@@ -77,6 +77,7 @@ golang.org
 gopkg.in
 grafana.com
 grafana.github.io
+helm.elastic.co
 http.us.debian.org
 ifconfig.io
 ingress.coralogix.us

diff --git a/gen3/bin/create-es7-cluster.sh b/gen3/bin/create-es7-cluster.sh
@@ -40,6 +40,7 @@ else
     --vpc-options "SubnetIds=${subnet_ids[*]},SecurityGroupIds=${security_groups[*]}" \
     --access-policies "$access_policies" \
     --encryption-at-rest-options "Enabled=true,KmsKeyId=$kms_key_id"\
+    --node-to-node-encryption-options "Enabled=true"
     > /dev/null 2>&1
 
     # Wait for the new cluster to be available
@@ -60,4 +61,4 @@ else
     if [ $retry_count -eq $max_retries ]; then
     echo "New cluster creation may still be in progress. Please check the AWS Management Console for the status."
     fi
-fi
+fi
diff --git a/gen3/bin/kube-setup-system-services.sh b/gen3/bin/kube-setup-system-services.sh
@@ -19,7 +19,7 @@ gen3_load "gen3/gen3setup"
 kubeproxy=${kubeproxy:-1.24.7}
 coredns=${coredns:-1.8.7}
 kubednsautoscaler=${kubednsautoscaler:-1.8.6}
-cni=${cni:-1.12.2}
+cni=${cni:-1.14.1}
 calico=${calico:-1.7.8}
 
 

diff --git a/kube/services/argo-events/workflows/configmap.yaml b/kube/services/argo-events/workflows/configmap.yaml
@@ -8,7 +8,7 @@ data:
     apiVersion: karpenter.sh/v1alpha5
     kind: Provisioner
     metadata:
-      name: workflow-$WORKFLOW_NAME
+      name: workflow-WORKFLOW_NAME
     spec:
       requirements:
         - key: karpenter.sh/capacity-type
@@ -18,23 +18,42 @@ data:
           operator: In
           values:
           - amd64
-        - key: karpenter.k8s.aws/instance-family
+        - key: node.kubernetes.io/instance-type
           operator: In
           values:
-          - c6i
-          - c7i
-          - m7i
+          - c6a.large
+          - c6a.xlarge
+          - c6a.2xlarge
+          - c6a.4xlarge
+          - c6a.8xlarge
+          - c6a.12xlarge
+          - c6i.large
+          - c6i.xlarge
+          - c6i.2xlarge
+          - c6i.4xlarge
+          - c6i.8xlarge
+          - c6i.12xlarge
+          - m6a.2xlarge
+          - m6a.4xlarge
+          - m6a.8xlarge
+          - m6a.12xlarge
+          - m6a.16xlarge
+          - m6i.2xlarge
+          - m6i.4xlarge
+          - m6i.8xlarge
+          - m6i.12xlarge
+          - m6i.16xlarge
       taints:
         - key: role
-          value: $WORKFLOW_NAME
+          value: WORKFLOW_NAME
           effect: NoSchedule
       labels:
-        role: $WORKFLOW_NAME
+        role: WORKFLOW_NAME
       limits:
         resources:
           cpu: 2000
       providerRef:
-        name: workflow-$WORKFLOW_NAME
+        name: workflow-WORKFLOW_NAME
       # Kill nodes after 30 days to ensure they stay up to date
       ttlSecondsUntilExpired: 2592000
       ttlSecondsAfterEmpty: 10
@@ -43,18 +62,18 @@ data:
     apiVersion: karpenter.k8s.aws/v1alpha1
     kind: AWSNodeTemplate
     metadata:
-      name: workflow-$WORKFLOW_NAME
+      name: workflow-WORKFLOW_NAME
     spec:
       subnetSelector:
-        karpenter.sh/discovery: $ENVIRONMENT
+        karpenter.sh/discovery: ENVIRONMENT
       securityGroupSelector:
-        karpenter.sh/discovery: $ENVIRONMENT-workflow
+        karpenter.sh/discovery: ENVIRONMENT-workflow
       tags:
-        Environment: $ENVIRONMENT
-        Name: eks-$ENVIRONMENT-workflow-karpenter
-        karpenter.sh/discovery: $ENVIRONMENT
-        workflowname: $WORKFLOW_NAME
-        gen3username: $GEN3_USERNAME
+        Environment: ENVIRONMENT
+        Name: eks-ENVIRONMENT-workflow-karpenter
+        karpenter.sh/discovery: ENVIRONMENT
+        workflowname: WORKFLOW_NAME
+        gen3username: GEN3_USERNAME
         gen3service: argo-workflows
         purpose: workflow
       metadataOptions:

diff --git a/kube/services/argo-events/workflows/sensor-created.yaml b/kube/services/argo-events/workflows/sensor-created.yaml
@@ -60,11 +60,11 @@ spec:
                           - "-c"
                           - |
                             if ! kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
-                              envsubst < /home/manifests/nodetemplate.yaml  | kubectl apply -f - 
+                              sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/"  | kubectl apply -f - 
                             fi
 
                             if ! kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
-                              envsubst < /home/manifests/provisioner.yaml  | kubectl apply -f - 
+                              sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/"  | kubectl apply -f - 
                             fi
                         env:
                           - name: WORKFLOW_NAME

diff --git a/kube/services/guppy/guppy-deploy.yaml b/kube/services/guppy/guppy-deploy.yaml
@@ -155,6 +155,6 @@ spec:
           resources:
             requests:
               cpu: 100m
-              memory: 128Mi
+              memory: 256Mi
             limits:
-              memory: 1200Mi
+              memory: 2000Mi
diff --git a/kube/services/karpenter-reconciler/application.yaml b/kube/services/karpenter-reconciler/application.yaml
@@ -0,0 +1,22 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: karpenter-reconciler-application
+  namespace: argocd
+spec:
+  destination:
+    namespace: kube-system
+    server: https://kubernetes.default.svc
+  project: default
+  source:
+    repoURL: https://github.com/uc-cdis/cloud-automation.git
+    targetRevision: master
+    path: kube/services/karpenter-reconciler
+    directory:
+      exclude: "application.yaml"
+  syncPolicy:
+    automated:
+      prune: true
+      selfHeal: true
+    syncOptions:
+      - CreateNamespace=true
diff --git a/kube/services/karpenter-reconciler/auth.yaml b/kube/services/karpenter-reconciler/auth.yaml
@@ -0,0 +1,44 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: karpenter-reconciler
+  namespace: argo-events
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: karpenter-admin-binding-reconciler
+subjects:
+  - kind: ServiceAccount
+    name: karpenter-reconciler
+    namespace: argo-events
+roleRef:
+  kind: ClusterRole
+  name: karpenter-admin
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: workflow-viewer-reconciler
+subjects:
+  - kind: ServiceAccount
+    name: karpenter-reconciler
+    namespace: argo-events
+roleRef:
+  kind: ClusterRole
+  name: argo-argo-workflows-view
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: viewer-reconciler
+subjects:
+  - kind: ServiceAccount
+    name: karpenter-reconciler
+    namespace: argo-events
+roleRef:
+  kind: ClusterRole
+  name: system:aggregate-to-view
+  apiGroup: rbac.authorization.k8s.io
diff --git a/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob.yaml b/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob.yaml
@@ -0,0 +1,72 @@
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: karpenter-reconciler-cronjob
+  namespace: argo-events
+spec:
+  schedule: "*/5 * * * *"
+  jobTemplate:
+    spec:
+      template:
+        metadata:
+          labels:
+            app: gen3job
+        spec:
+          serviceAccount: karpenter-reconciler
+          volumes:
+            - name: karpenter-templates-volume
+              configMap:
+                name: karpenter-templates
+          containers:
+            - name: karpenter-reconciler
+              image: quay.io/cdis/awshelper
+              volumeMounts:
+                - name: karpenter-templates-volume
+                  mountPath: /manifests
+              env:
+                - name: PROVISIONER_TEMPLATE
+                  value: /manifests/provisioner.yaml
+                - name: AWSNODETEMPLATE_TEMPLATE
+                  value: /manifests/nodetemplate.yaml
+              command: ["/bin/bash"]
+              args:
+                - "-c"
+                - |
+                  #!/bin/bash
+                  if [ -z "$PROVISIONER_TEMPLATE" ]; then
+                    PROVISIONER_TEMPLATE="provisioner.yaml"
+                  fi
+
+                  if [ -z "$AWSNODETEMPLATE_TEMPLATE" ]; then
+                    AWSNODETEMPLATE_TEMPLATE="nodetemplate.yaml"
+                  fi
+
+                  ENVIRONMENT=$(kubectl -n default get configmap global -o jsonpath="{.data.environment}")
+
+                  RAW_WORKFLOWS=$(kubectl get workflows -n argo -o yaml)
+
+                  WORKFLOWS=$(echo "${RAW_WORKFLOWS}" | yq -r '.items[] | [.metadata.name, .metadata.labels.gen3username] | join(" ")')
+
+                  WORKFLOW_ARRAY=()
+
+                  while IFS= read -r line; do
+                    WORKFLOW_ARRAY+=("$line")
+                  done <<< "$WORKFLOWS"
+
+                  for workflow in "${WORKFLOW_ARRAY[@]}"
+                  do
+                    workflow_name=$(echo "$workflow" | awk '{print $1}')
+                    workflow_user=$(echo "$workflow" | awk '{print $2}')
+
+                    if ! kubectl get awsnodetemplate workflow-$workflow_name >/dev/null 2>&1; then
+                      echo "No awsnodetemplate found for ${workflow_name}, creating one"
+                      sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$AWSNODETEMPLATE_TEMPLATE" | kubectl apply -f -
+                    fi
+
+                    if ! kubectl get provisioner workflow-$workflow_name >/dev/null 2>&1; then
+                      echo "No provisioner found for ${workflow_name}, creating one"
+                      sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$PROVISIONER_TEMPLATE" | kubectl apply -f -
+
+                    fi
+                  done
+          restartPolicy: OnFailure
diff --git a/kube/services/karpenter/nodeTemplateDefault.yaml b/kube/services/karpenter/nodeTemplateDefault.yaml
@@ -37,6 +37,7 @@ spec:
     sudo  dracut -f
     # configure grub
     sudo /sbin/grubby --update-kernel=ALL --args="fips=1"
+    sudo mount -t bpf bpffs /sys/fs/bpf
 
     --BOUNDARY
     Content-Type: text/cloud-config; charset="us-ascii"

diff --git a/kube/services/karpenter/nodeTemplateGPU.yaml b/kube/services/karpenter/nodeTemplateGPU.yaml
@@ -37,6 +37,7 @@ spec:
     sudo  dracut -f
     # configure grub
     sudo /sbin/grubby --update-kernel=ALL --args="fips=1"
+    sudo mount -t bpf bpffs /sys/fs/bpf
 
     --BOUNDARY
     Content-Type: text/cloud-config; charset="us-ascii"