diff --git a/.circleci/config.yml b/.circleci/config.yml index b14ed81..492ca1a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -80,8 +80,8 @@ jobs: when: always command: | helm upgrade --wait --install \ - binder-${CIRCLE_BRANCH} pangeo-binder \ - --namespace=binder-${CIRCLE_BRANCH} --version=v0.2.0 \ + ${CIRCLE_BRANCH} pangeo-binder \ + --namespace=${CIRCLE_BRANCH} --version=v0.2.0 \ -f ./deploy-aws/${CIRCLE_BRANCH}.yaml \ -f ./secrets-aws/${CIRCLE_BRANCH}.yaml - run: diff --git a/deploy-aws/staging-install.yaml b/deploy-aws/staging-install.yaml new file mode 100644 index 0000000..8bef793 --- /dev/null +++ b/deploy-aws/staging-install.yaml @@ -0,0 +1,28 @@ +projectName: pangeo-binder-aws-staging +binderhub: + config: + BinderHub: + use_registry: true + image_prefix: pangeoaccess/binder-staging- + ingress: + https: + enabled: false + + jupyterhub: + ingress: + enabled: false + + dind: + enabled: false + daemonset: + image: + name: docker + tag: 19.03.5-dind + imageCleaner: + enabled: true + # when 80% of inodes are used, + # cull images until only 40% are used. + imageGCThresholdHigh: 80 + imageGCThresholdLow: 40 + host: + enabled: true diff --git a/deploy-aws/staging.yaml b/deploy-aws/staging.yaml index b6a9a6f..82e4b06 100644 --- a/deploy-aws/staging.yaml +++ b/deploy-aws/staging.yaml @@ -1,34 +1,70 @@ -projectName: pangeo-binder-staging +projectName: pangeo-binder-aws-staging + binderhub: config: BinderHub: - hub_url: https://hub.aws-uswest2-binder.pangeo.io + build_node_selector: + hub.jupyter.org/node-purpose: user + hub_url: https://hub.staging.aws-uswest2-binder.pangeo.io + badge_base_url: https://staging.aws-uswest2-binder.pangeo.io + image_prefix: pangeoaccess/binder-staging- + use_registry: true + nodeSelector: + hub.jupyter.org/node-purpose: core + ingress: + enabled: true hosts: - - aws-uswest2-binder.pangeo.io - dind: - hostLibDir: /var/lib/dind/staging - hostSocketDir: /var/run/dind/staging - -# Comment this section for first deployment w/o https - jupyterhub: - proxy: + - staging.aws-uswest2-binder.pangeo.io + annotations: + kubernetes.io/ingress.class: nginx + kubernetes.io/tls-acme: "true" + cert-manager.io/issuer: letsencrypt-production https: + enabled: true + type: nginx + tls: + - secretName: staging-aws-uswest2-binder-pangeo-io-tls hosts: - - hub.aws-uswest2-binder.pangeo.io - letsencrypt: - contactEmail: scottyh@uw.edu + - staging.aws-uswest2-binder.pangeo.io + +# uncomment to use dind +# dind: +# enabled: true +# hostLibDir: /var/lib/dind/stage +# hostSocketDir: /var/run/dind/stage +# imageCleaner: +# enabled: true +# host: +# enabled: false + jupyterhub: + proxy: + nodeSelector: + hub.jupyter.org/node-purpose: core ingress: enabled: true + hosts: + - hub.staging.aws-uswest2-binder.pangeo.io annotations: - ingress.kubernetes.io/proxy-body-size: 64m kubernetes.io/ingress.class: nginx - kubernetes.io/tls-acme: 'true' - hosts: - - hub.aws-uswest2-binder.pangeo.io + kubernetes.io/tls-acme: "true" + cert-manager.io/issuer: letsencrypt-production + https: + enabled: true + type: nginx tls: - - secretName: kubelego-tls-jupyterhub-staging - hosts: - - hub.aws-uswest2-binder.pangeo.io + - secretName: hub-staging-aws-uswest2-binder-pangeo-io-tls + hosts: + - hub.staging.aws-uswest2-binder.pangeo.io + singleuser: + extraEnv: + DASK_GATEWAY__ADDRESS: "https://hub.staging.aws-uswest2-binder.pangeo.io/services/dask-gateway/" + DASK_GATEWAY__PROXY_ADDRESS: "tls://scheduler-public-staging-dask-gateway:8786" + hub: + services: + dask-gateway: + # This makes the gateway available at ${HUB_URL}/services/dask-gateway + url: http://web-public-staging-dask-gateway + diff --git a/k8s-aws/binderhub-issuer-prod.yaml b/k8s-aws/binderhub-issuer-prod.yaml new file mode 100644 index 0000000..6ea04a6 --- /dev/null +++ b/k8s-aws/binderhub-issuer-prod.yaml @@ -0,0 +1,19 @@ +apiVersion: cert-manager.io/v1alpha2 +kind: Issuer +metadata: + name: letsencrypt-production + namespace: prod +spec: + acme: + # You must replace this email address with your own. + # Let's Encrypt will use this to contact you about expiring + # certificates, and issues related to your account. + email: scottyh@uw.edu + server: https://acme-v02.api.letsencrypt.org/directory + privateKeySecretRef: + # Secret resource used to store the account's private key. + name: letsencrypt-production + solvers: + - http01: + ingress: + class: nginx diff --git a/k8s-aws/binderhub-issuer-staging.yaml b/k8s-aws/binderhub-issuer-staging.yaml new file mode 100644 index 0000000..67e3322 --- /dev/null +++ b/k8s-aws/binderhub-issuer-staging.yaml @@ -0,0 +1,19 @@ +apiVersion: cert-manager.io/v1alpha2 +kind: Issuer +metadata: + name: letsencrypt-production + namespace: staging +spec: + acme: + # You must replace this email address with your own. + # Let's Encrypt will use this to contact you about expiring + # certificates, and issues related to your account. + email: scottyh@uw.edu + server: https://acme-v02.api.letsencrypt.org/directory + privateKeySecretRef: + # Secret resource used to store the account's private key. + name: letsencrypt-production + solvers: + - http01: + ingress: + class: nginx diff --git a/k8s-aws/cluster-autoscaler.yml b/k8s-aws/cluster-autoscaler.yml new file mode 100644 index 0000000..cb0c0f3 --- /dev/null +++ b/k8s-aws/cluster-autoscaler.yml @@ -0,0 +1,157 @@ +#--- +#apiVersion: v1 +#kind: ServiceAccount +#metadata: +# labels: +# k8s-addon: cluster-autoscaler.addons.k8s.io +# k8s-app: cluster-autoscaler +# name: cluster-autoscaler +# namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["extensions"] + resources: ["replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["statefulsets", "replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["watch", "list", "get"] + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "patch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update", "watch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + spec: + serviceAccountName: cluster-autoscaler + containers: + - image: k8s.gcr.io/cluster-autoscaler:v1.14.7 + name: cluster-autoscaler + resources: + limits: + cpu: 100m + memory: 300Mi + requests: + cpu: 100m + memory: 300Mi + command: + - ./cluster-autoscaler + - --v=4 + - --stderrthreshold=info + - --cloud-provider=aws + - --skip-nodes-with-local-storage=false + - --expander=least-waste + - --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/pangeo-binder + volumeMounts: + - name: ssl-certs + mountPath: /etc/ssl/certs/ca-certificates.crt + readOnly: true + imagePullPolicy: "Always" + volumes: + - name: ssl-certs + hostPath: + path: "/etc/ssl/certs/ca-bundle.crt" diff --git a/k8s-aws/eksctl-config.yml b/k8s-aws/eksctl-config.yml new file mode 100644 index 0000000..766af14 --- /dev/null +++ b/k8s-aws/eksctl-config.yml @@ -0,0 +1,159 @@ +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: pangeo-binder + region: us-west-2 + +cloudWatch: + clusterLogging: + enableTypes: ["*"] + +iam: + withOIDC: true + serviceAccounts: + - metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + aws-usage: "cluster-ops" + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler + attachPolicyARNs: + - "arn:aws:iam::783380859522:policy/cluster-autoscaler" + - metadata: + name: pangeo + namespace: prod + labels: + aws-usage: "application" + attachPolicyARNs: + - "arn:aws:iam::783380859522:policy/pangeo-data-s3" + - metadata: + name: pangeo + namespace: staging + labels: + aws-usage: "application" + attachPolicyARNs: + - "arn:aws:iam::783380859522:policy/pangeo-data-s3" + +nodeGroups: + - name: core-spot + minSize: 1 + maxSize: 2 + desiredCapacity: 1 + privateNetworking: true + volumeSize: 100 + volumeType: gp2 + labels: + node-role.kubernetes.io/core: core + hub.jupyter.org/node-purpose: core + instancesDistribution: + instanceTypes: + - t3a.large + - t3.large + spotInstancePools: 2 + onDemandBaseCapacity: 0 + onDemandPercentageAboveBaseCapacity: 0 # all spot + ami: auto + amiFamily: AmazonLinux2 + iam: + withAddonPolicies: + autoScaler: true + - name: user-spot + minSize: 0 + maxSize: 10 + desiredCapacity: 0 + privateNetworking: true + instancesDistribution: + instanceTypes: + - m5.2xlarge + - m5a.2xlarge + - m5n.2xlarge + spotInstancePools: 3 + onDemandBaseCapacity: 0 + onDemandPercentageAboveBaseCapacity: 0 # all spot + volumeSize: 100 + volumeType: gp2 + labels: + node-role.kubernetes.io/user: user + hub.jupyter.org/node-purpose: user + taints: + hub.jupyter.org/dedicated: 'user:NoSchedule' + tags: + k8s.io/cluster-autoscaler/node-template/label/hub.jupyter.org/node-purpose: user + k8s.io/cluster-autoscaler/node-template/taint/hub.jupyter.org/dedicated: 'user:NoSchedule' + ami: auto + amiFamily: AmazonLinux2 + iam: + withAddonPolicies: + autoScaler: true + preBootstrapCommands: # see https://github.com/weaveworks/eksctl/issues/1310 + - yum install -y iptables-services + - iptables --insert FORWARD 1 --in-interface eni+ --destination 169.254.169.254/32 --jump DROP + - iptables-save | tee /etc/sysconfig/iptables + - systemctl enable --now iptables + - name: worker-spot + minSize: 0 + maxSize: 10 + desiredCapacity: 0 + privateNetworking: true + instancesDistribution: + instanceTypes: + - r5.2xlarge + - r5a.2xlarge + - r5n.2xlarge + spotInstancePools: 3 + onDemandBaseCapacity: 0 + onDemandPercentageAboveBaseCapacity: 0 + volumeSize: 100 + volumeType: gp2 + labels: + node-role.kubernetes.io/worker: worker + k8s.dask.org/node-purpose: worker + taints: + k8s.dask.org/dedicated: 'worker:NoSchedule' + tags: + k8s.io/cluster-autoscaler/node-template/label/k8s.dask.org/node-purpose: worker + k8s.io/cluster-autoscaler/node-template/taint/k8s.dask.org/dedicated: "worker:NoSchedule" + ami: auto + amiFamily: AmazonLinux2 + iam: + withAddonPolicies: + autoScaler: true + preBootstrapCommands: # see https://github.com/weaveworks/eksctl/issues/1310 + - yum install -y iptables-services + - iptables --insert FORWARD 1 --in-interface eni+ --destination 169.254.169.254/32 --jump DROP + - iptables-save | tee /etc/sysconfig/iptables + - systemctl enable --now iptables + - name: scheduler-spot + minSize: 0 + maxSize: 20 + desiredCapacity: 0 + privateNetworking: true + instancesDistribution: + instanceTypes: + - t3.large + - t3a.large + spotInstancePools: 2 + onDemandBaseCapacity: 0 + onDemandPercentageAboveBaseCapacity: 0 + volumeSize: 100 + volumeType: gp2 + labels: + node-role.kubernetes.io/scheduler: scheduler + k8s.dask.org/node-purpose: scheduler + taints: + k8s.dask.org/dedicated: 'scheduler:NoSchedule' + tags: + k8s.io/cluster-autoscaler/node-template/label/k8s.dask.org/node-purpose: scheduler + k8s.io/cluster-autoscaler/node-template/taint/k8s.dask.org/dedicated: "scheduler:NoSchedule" + ami: auto + amiFamily: AmazonLinux2 + iam: + withAddonPolicies: + autoScaler: true + preBootstrapCommands: # see https://github.com/weaveworks/eksctl/issues/1310 + - yum install -y iptables-services + - iptables --insert FORWARD 1 --in-interface eni+ --destination 169.254.169.254/32 --jump DROP + - iptables-save | tee /etc/sysconfig/iptables + - systemctl enable --now iptables diff --git a/k8s-aws/readme.md b/k8s-aws/readme.md new file mode 100644 index 0000000..b148a12 --- /dev/null +++ b/k8s-aws/readme.md @@ -0,0 +1,58 @@ +# Deploy k8s cluster on aws eks + + +##### Deploy cluster and nodegroups +``` +eksctl create cluster -f eksctl-config.yml +``` + + +##### Patch default aws-storage class (https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues/1413) +``` +kubectl apply -f storage-class.yml +kubectl patch storageclass gp2 -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' +``` + + +##### Deploy cluster autoscaler +``` +kubectl apply -f cluster-autoscaler.yml +``` + + +##### Deploy binderhub +``` +export CIRCLE_BRANCH=staging +helm upgrade --wait --install ${CIRCLE_BRANCH} pangeo-binder --namespace=${CIRCLE_BRANCH} --version=v0.2.0 -f ./deploy-aws/${CIRCLE_BRANCH}-install.yaml -f ./secrets-aws/${CIRCLE_BRANCH}-install.yaml --cleanup-on-fail +``` +NOTE: confirm non-https deployment working by `kubectl get pods -A` and going to external-ip from `kubectl get svc binder -n $CIRCLE_BRANCH` + + +##### Upgrade binderhub w/ manually edited https settings +NOTE: add loadbalanerIPs to secrets-aws/staging.yaml, update DNS settings for domain name +``` +export CIRCLE_BRANCH=staging +helm upgrade --wait --install ${CIRCLE_BRANCH} pangeo-binder --namespace=${CIRCLE_BRANCH} --version=v0.2.0 -f ./deploy-aws/${CIRCLE_BRANCH}.yaml -f ./secrets-aws/${CIRCLE_BRANCH}.yaml --cleanup-on-fail +``` + + +##### Set up HTTPS (https://binderhub.readthedocs.io/en/latest/https.html) +NOTE: edit binderhub-issuer-staging.yaml with your email +``` +kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v0.11.0/cert-manager.yaml --validate=false +# Wait about 2 minutes for 'webhook' to start running before running this command: +kubectl apply -f binderhub-issuer-${CIRCLE_BRANCH}.yaml +``` +You should now have a functioning binderhub at https://staging.aws-uswest2-binder.pangeo.io !!! + + +##### Removing things: +you can get rid of resources created with `kubectl apply` with `kubectl delete`: +``` +kubectl delete -f https://github.com/jetstack/cert-manager/releases/download/v0.11.0/cert-manager.yaml +``` + +Or tear everything down with +``` +helm delete staging -n staging +``` diff --git a/k8s-aws/storage-class.yml b/k8s-aws/storage-class.yml new file mode 100644 index 0000000..c4b7424 --- /dev/null +++ b/k8s-aws/storage-class.yml @@ -0,0 +1,13 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: mygp2 + annotations: + storageclass.kubernetes.io/is-default-class: "true" +parameters: + type: gp2 + fsType: ext4 +provisioner: kubernetes.io/aws-ebs +reclaimPolicy: Delete +volumeBindingMode: WaitForFirstConsumer +allowVolumeExpansion: true diff --git a/secrets-aws/staging-install.yaml b/secrets-aws/staging-install.yaml new file mode 100644 index 0000000..712cdd1 Binary files /dev/null and b/secrets-aws/staging-install.yaml differ diff --git a/secrets-aws/staging.yaml b/secrets-aws/staging.yaml index 6948fad..cf6f26b 100644 Binary files a/secrets-aws/staging.yaml and b/secrets-aws/staging.yaml differ