From 971241444f1b174265d2ee5f479d15979f66f424 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 14 Jul 2023 10:38:18 -0500 Subject: [PATCH 01/72] feat(gen3-openai): rough initial testing, no automation for rolling --- files/squid_whitelist/web_whitelist | 1 + .../gen3-openai/gen3-openai-deploy.yaml | 46 +++++++++++++++++++ .../gen3-openai/gen3-openai-service.yaml | 30 ++++++++++++ .../gen3.nginx.conf/gen3-openai-service.conf | 17 +++++++ 4 files changed, 94 insertions(+) create mode 100644 kube/services/gen3-openai/gen3-openai-deploy.yaml create mode 100644 kube/services/gen3-openai/gen3-openai-service.yaml create mode 100644 kube/services/revproxy/gen3.nginx.conf/gen3-openai-service.conf diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist index c36194765..a3d74e76c 100644 --- a/files/squid_whitelist/web_whitelist +++ b/files/squid_whitelist/web_whitelist @@ -14,6 +14,7 @@ ctds-planx.atlassian.net data.cityofchicago.org dataguids.org api.login.yahoo.com +api.openai.com api.snapcraft.io apt.kubernetes.io argoproj.github.io diff --git a/kube/services/gen3-openai/gen3-openai-deploy.yaml b/kube/services/gen3-openai/gen3-openai-deploy.yaml new file mode 100644 index 000000000..fc19be45e --- /dev/null +++ b/kube/services/gen3-openai/gen3-openai-deploy.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gen3-openai-deployment +spec: + selector: + # Only select pods based on the 'app' label + matchLabels: + app: gen3-openai + release: production + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + app: gen3-openai + release: production + spec: + containers: + - name: gen3-openai + image: "quay.io/cdis/gen3-openai:latest" + imagePullPolicy: Always + ports: + - containerPort: 8080 + env: + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: gen3-openai-g3auto + key: "openai_key" + - name: TOPICS + value: default,custom + - name: CUSTOM_SYSTEM_PROMPT + value: You answer questions about datasets that are available in BioData Catalyst. You'll be given relevant dataset descriptions for every dataset that's been ingested into BioData Catalyst. You are acting as a search assistant for a biomedical researcher (who will be asking you questions). The researcher is likely trying to find datasets of interest for a particular research question. You should recommend datasets that may be of interest to that researcher. + - name: CUSTOM_EMBEDDINGS_PATH + value: embeddings/embeddings.csv + imagePullPolicy: Always + resources: + requests: + cpu: 1 + limits: + cpu: 2 + memory: 512Mi diff --git a/kube/services/gen3-openai/gen3-openai-service.yaml b/kube/services/gen3-openai/gen3-openai-service.yaml new file mode 100644 index 000000000..7d3588835 --- /dev/null +++ b/kube/services/gen3-openai/gen3-openai-service.yaml @@ -0,0 +1,30 @@ +kind: Service +apiVersion: v1 +metadata: + name: gen3-openai-service + annotations: + getambassador.io/config: | + --- + apiVersion: ambassador/v1 + ambassador_id: "gen3" + kind: Mapping + name: gen3-openai_mapping + prefix: /index/ + service: http://gen3-openai-service:80 +spec: + selector: + app: gen3-openai + release: production + ports: + - protocol: TCP + port: 80 + targetPort: 80 + name: http + nodePort: null + - protocol: TCP + port: 443 + targetPort: 443 + name: https + nodePort: null + type: ClusterIP + diff --git a/kube/services/revproxy/gen3.nginx.conf/gen3-openai-service.conf b/kube/services/revproxy/gen3.nginx.conf/gen3-openai-service.conf new file mode 100644 index 000000000..1f3668ca4 --- /dev/null +++ b/kube/services/revproxy/gen3.nginx.conf/gen3-openai-service.conf @@ -0,0 +1,17 @@ + location /openai/ { + if ($csrf_check !~ ^ok-\S.+$) { + return 403 "failed csrf check"; + } + set $authz_resource "/mds_gateway"; + set $authz_method "access"; + set $authz_service "mds_gateway"; + # be careful - sub-request runs in same context as this request + auth_request /gen3-authz; + + set $proxy_service "gen3-openai-service"; + set $upstream http://gen3-openai-service$des_domain; + rewrite ^/openai/(.*) /$1 break; + proxy_pass $upstream; + proxy_redirect http://$host/ https://$host/openai/; + client_max_body_size 0; + } From f89c8a415e220b4ed58eb853d8363529ca7a71de Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Wed, 25 Oct 2023 11:54:06 -0500 Subject: [PATCH 02/72] feat(gen3-discovery-ai): initial deployment --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 84 +++++++++++++++++++ .../gen3-discovery-ai/gen3-openai-deploy.yaml | 55 ++++++++++++ .../gen3-openai-service.yaml | 8 +- .../gen3-openai/gen3-openai-deploy.yaml | 46 ---------- .../gen3-discoveryai-service.conf | 12 +++ .../gen3.nginx.conf/gen3-openai-service.conf | 17 ---- 6 files changed, 155 insertions(+), 67 deletions(-) create mode 100644 gen3/bin/kube-setup-gen3-discovery-ai.sh create mode 100644 kube/services/gen3-discovery-ai/gen3-openai-deploy.yaml rename kube/services/{gen3-openai => gen3-discovery-ai}/gen3-openai-service.yaml (75%) delete mode 100644 kube/services/gen3-openai/gen3-openai-deploy.yaml create mode 100644 kube/services/revproxy/gen3.nginx.conf/gen3-discoveryai-service.conf delete mode 100644 kube/services/revproxy/gen3.nginx.conf/gen3-openai-service.conf diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh new file mode 100644 index 000000000..ca8db5490 --- /dev/null +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# +# Deploy the gen3-discovery-ai service. +# + +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/gen3setup" + +# NOTE: no db for this service yet, but we'll likely need it in the future +setup_database() { + gen3_log_info "setting up gen3-discovery-ai service ..." + + if g3kubectl describe secret gen3-discovery-ai-g3auto > /dev/null 2>&1; then + gen3_log_info "gen3-discovery-ai-g3auto secret already configured" + return 0 + fi + if [[ -n "$JENKINS_HOME" || ! -f "$(gen3_secrets_folder)/creds.json" ]]; then + gen3_log_err "skipping db setup in non-adminvm environment" + return 0 + fi + # Setup .env file that gen3-discovery-ai service consumes + if [[ ! -f "$secretsFolder/gen3-discovery-ai.env" || ! -f "$secretsFolder/base64Authz.txt" ]]; then + local secretsFolder="$(gen3_secrets_folder)/g3auto/gen3-discovery-ai" + + if [[ ! -f "$secretsFolder/dbcreds.json" ]]; then + if ! gen3 db setup gen3-discovery-ai; then + gen3_log_err "Failed setting up database for gen3-discovery-ai service" + return 1 + fi + fi + if [[ ! -f "$secretsFolder/dbcreds.json" ]]; then + gen3_log_err "dbcreds not present in Gen3Secrets/" + return 1 + fi + + # go ahead and rotate the password whenever we regen this file + local password="$(gen3 random)" + cat - > "$secretsFolder/gen3-discovery-ai.env" < "$secretsFolder/base64Authz.txt" + fi + gen3 secrets sync 'setup gen3-discovery-ai-g3auto secrets' +} + +if ! g3k_manifest_lookup .versions.gen3-discovery-ai 2> /dev/null; then + gen3_log_info "kube-setup-gen3-discovery-ai exiting - gen3-discovery-ai service not in manifest" + exit 0 +fi + +# There's no db for this service *yet* +# +# if ! setup_database; then +# gen3_log_err "kube-setup-gen3-discovery-ai bailing out - database failed setup" +# exit 1 +# fi + + +if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then + g3kubectl delete configmap gen3-discovery-ai-knowledge-library + g3kubectl create configmap gen3-discovery-ai-knowledge-library --from-file "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" +fi + +# Sync the manifest config from manifest.json (or manifests/gen3-discovery-ai.json) to the k8s config map. +# This may not actually create the manifest-gen3-discovery-ai config map if the user did not specify any gen3-discovery-ai +# keys in their manifest configuration. +[[ -z "$GEN3_ROLL_ALL" ]] && gen3 gitops configmaps + +gen3 roll gen3-discovery-ai +g3kubectl apply -f "${GEN3_HOME}/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml" + +if [[ -z "$GEN3_ROLL_ALL" ]]; then + gen3 kube-setup-networkpolicy + gen3 kube-setup-revproxy +fi + +gen3_log_info "The gen3-discovery-ai service has been deployed onto the kubernetes cluster" +gen3_log_info "test with: curl https://commons-host/ai" diff --git a/kube/services/gen3-discovery-ai/gen3-openai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-openai-deploy.yaml new file mode 100644 index 000000000..35e6746d2 --- /dev/null +++ b/kube/services/gen3-discovery-ai/gen3-openai-deploy.yaml @@ -0,0 +1,55 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gen3-discovery-ai-deployment +spec: + selector: + # Only select pods based on the 'app' label + matchLabels: + app: gen3-discovery-ai + release: production + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + app: gen3-discovery-ai + release: production + spec: + volumes: + - name: config-volume-g3auto + secret: + secretName: gen3-discovery-ai-g3auto + - name: gen3-discovery-ai-knowledge-library + mountPath: / + containers: + - name: gen3-discovery-ai + image: "quay.io/cdis/gen3-discovery-ai:latest" + imagePullPolicy: Always + ports: + - containerPort: 8080 + env: + - name: GEN3_DEBUG + GEN3_DEBUG_FLAG|-value: "False"-| + volumeMounts: + - name: config-volume-g3auto + readOnly: true + mountPath: /.env + subPath: gen3-discovery-ai.env + - name: gen3-discovery-ai-knowledge-library + readOnly: false + configMap: + name: knowledge + items: + - key: knowledge + path: knowledge + imagePullPolicy: Always + resources: + requests: + cpu: 1 + limits: + cpu: 2 + memory: 512Mi diff --git a/kube/services/gen3-openai/gen3-openai-service.yaml b/kube/services/gen3-discovery-ai/gen3-openai-service.yaml similarity index 75% rename from kube/services/gen3-openai/gen3-openai-service.yaml rename to kube/services/gen3-discovery-ai/gen3-openai-service.yaml index 7d3588835..eae88bf9b 100644 --- a/kube/services/gen3-openai/gen3-openai-service.yaml +++ b/kube/services/gen3-discovery-ai/gen3-openai-service.yaml @@ -1,19 +1,19 @@ kind: Service apiVersion: v1 metadata: - name: gen3-openai-service + name: gen3-discovery-ai-service annotations: getambassador.io/config: | --- apiVersion: ambassador/v1 ambassador_id: "gen3" kind: Mapping - name: gen3-openai_mapping + name: gen3-discovery-ai_mapping prefix: /index/ - service: http://gen3-openai-service:80 + service: http://gen3-discovery-ai-service:80 spec: selector: - app: gen3-openai + app: gen3-discovery-ai release: production ports: - protocol: TCP diff --git a/kube/services/gen3-openai/gen3-openai-deploy.yaml b/kube/services/gen3-openai/gen3-openai-deploy.yaml deleted file mode 100644 index fc19be45e..000000000 --- a/kube/services/gen3-openai/gen3-openai-deploy.yaml +++ /dev/null @@ -1,46 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: gen3-openai-deployment -spec: - selector: - # Only select pods based on the 'app' label - matchLabels: - app: gen3-openai - release: production - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - template: - metadata: - labels: - app: gen3-openai - release: production - spec: - containers: - - name: gen3-openai - image: "quay.io/cdis/gen3-openai:latest" - imagePullPolicy: Always - ports: - - containerPort: 8080 - env: - - name: OPENAI_API_KEY - valueFrom: - secretKeyRef: - name: gen3-openai-g3auto - key: "openai_key" - - name: TOPICS - value: default,custom - - name: CUSTOM_SYSTEM_PROMPT - value: You answer questions about datasets that are available in BioData Catalyst. You'll be given relevant dataset descriptions for every dataset that's been ingested into BioData Catalyst. You are acting as a search assistant for a biomedical researcher (who will be asking you questions). The researcher is likely trying to find datasets of interest for a particular research question. You should recommend datasets that may be of interest to that researcher. - - name: CUSTOM_EMBEDDINGS_PATH - value: embeddings/embeddings.csv - imagePullPolicy: Always - resources: - requests: - cpu: 1 - limits: - cpu: 2 - memory: 512Mi diff --git a/kube/services/revproxy/gen3.nginx.conf/gen3-discoveryai-service.conf b/kube/services/revproxy/gen3.nginx.conf/gen3-discoveryai-service.conf new file mode 100644 index 000000000..cc7b361e2 --- /dev/null +++ b/kube/services/revproxy/gen3.nginx.conf/gen3-discoveryai-service.conf @@ -0,0 +1,12 @@ + location /ai { + if ($csrf_check !~ ^ok-\S.+$) { + return 403 "failed csrf check"; + } + + set $proxy_service "gen3-discovery-ai-service"; + set $upstream http://gen3-discovery-ai-service$des_domain; + rewrite ^/ai(.*) /$1 break; + proxy_pass $upstream; + proxy_redirect http://$host/ https://$host/ai; + client_max_body_size 0; + } diff --git a/kube/services/revproxy/gen3.nginx.conf/gen3-openai-service.conf b/kube/services/revproxy/gen3.nginx.conf/gen3-openai-service.conf deleted file mode 100644 index 1f3668ca4..000000000 --- a/kube/services/revproxy/gen3.nginx.conf/gen3-openai-service.conf +++ /dev/null @@ -1,17 +0,0 @@ - location /openai/ { - if ($csrf_check !~ ^ok-\S.+$) { - return 403 "failed csrf check"; - } - set $authz_resource "/mds_gateway"; - set $authz_method "access"; - set $authz_service "mds_gateway"; - # be careful - sub-request runs in same context as this request - auth_request /gen3-authz; - - set $proxy_service "gen3-openai-service"; - set $upstream http://gen3-openai-service$des_domain; - rewrite ^/openai/(.*) /$1 break; - proxy_pass $upstream; - proxy_redirect http://$host/ https://$host/openai/; - client_max_body_size 0; - } From 371e84cd05a5aef5ec598282463f4f4f180f8ab3 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Wed, 25 Oct 2023 13:33:10 -0500 Subject: [PATCH 03/72] fix(ai): fix setup jq escaping --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index ca8db5490..351d33721 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -49,7 +49,7 @@ EOM gen3 secrets sync 'setup gen3-discovery-ai-g3auto secrets' } -if ! g3k_manifest_lookup .versions.gen3-discovery-ai 2> /dev/null; then +if ! g3k_manifest_lookup '.versions."gen3-discovery-ai"' 2> /dev/null; then gen3_log_info "kube-setup-gen3-discovery-ai exiting - gen3-discovery-ai service not in manifest" exit 0 fi From 8f2c0605bbad17fae2c1dd4b03b43fe3216ad410 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 10:42:12 -0500 Subject: [PATCH 04/72] fix(ai): fix file name --- .../{gen3-openai-deploy.yaml => gen3-discovery-ai-deploy.yaml} | 0 .../{gen3-openai-service.yaml => gen3-discovery-ai-service.yaml} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename kube/services/gen3-discovery-ai/{gen3-openai-deploy.yaml => gen3-discovery-ai-deploy.yaml} (100%) rename kube/services/gen3-discovery-ai/{gen3-openai-service.yaml => gen3-discovery-ai-service.yaml} (100%) diff --git a/kube/services/gen3-discovery-ai/gen3-openai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml similarity index 100% rename from kube/services/gen3-discovery-ai/gen3-openai-deploy.yaml rename to kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml diff --git a/kube/services/gen3-discovery-ai/gen3-openai-service.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml similarity index 100% rename from kube/services/gen3-discovery-ai/gen3-openai-service.yaml rename to kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml From 1e5b0791a2c84a201f7fb87a7683437ea3e60dc5 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 10:56:24 -0500 Subject: [PATCH 05/72] fix(ai): fix deployment configmap --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 35e6746d2..c5100d511 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -24,7 +24,11 @@ spec: secret: secretName: gen3-discovery-ai-g3auto - name: gen3-discovery-ai-knowledge-library - mountPath: / + configMap: + name: knowledge + items: + - key: knowledge + path: knowledge containers: - name: gen3-discovery-ai image: "quay.io/cdis/gen3-discovery-ai:latest" @@ -41,11 +45,7 @@ spec: subPath: gen3-discovery-ai.env - name: gen3-discovery-ai-knowledge-library readOnly: false - configMap: - name: knowledge - items: - - key: knowledge - path: knowledge + mountPath: /knowledge imagePullPolicy: Always resources: requests: From 9b3e977c899f3e416a2113b246a5ad75c3c3206c Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 11:32:15 -0500 Subject: [PATCH 06/72] fix(ai): fix configmap --- .../services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index c5100d511..8294bb60a 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -25,10 +25,7 @@ spec: secretName: gen3-discovery-ai-g3auto - name: gen3-discovery-ai-knowledge-library configMap: - name: knowledge - items: - - key: knowledge - path: knowledge + name: gen3-discovery-ai-knowledge-library containers: - name: gen3-discovery-ai image: "quay.io/cdis/gen3-discovery-ai:latest" From 38ca437f6a012405ae0bdc3885bd1f9d3c4b2d96 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 11:41:55 -0500 Subject: [PATCH 07/72] fix(ai): env path --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 8294bb60a..09cae65b0 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -39,7 +39,7 @@ spec: - name: config-volume-g3auto readOnly: true mountPath: /.env - subPath: gen3-discovery-ai.env + subPath: env - name: gen3-discovery-ai-knowledge-library readOnly: false mountPath: /knowledge From 3832fdaa15f97bdb5f9c9e41838418f4c5f4d339 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 11:50:29 -0500 Subject: [PATCH 08/72] feat(image): use from manifest --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 09cae65b0..2cd9596a2 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -18,6 +18,7 @@ spec: labels: app: gen3-discovery-ai release: production + GEN3_DATE_LABEL spec: volumes: - name: config-volume-g3auto @@ -28,7 +29,7 @@ spec: name: gen3-discovery-ai-knowledge-library containers: - name: gen3-discovery-ai - image: "quay.io/cdis/gen3-discovery-ai:latest" + GEN3_GEN3-DISCOVERY-AI_IMAGE imagePullPolicy: Always ports: - containerPort: 8080 From 177c859ff9f8a14c4c62ad856c88bc06d961f8c5 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 12:45:34 -0500 Subject: [PATCH 09/72] chore(ai): better naming --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 2cd9596a2..ca7cdd8b0 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -21,10 +21,10 @@ spec: GEN3_DATE_LABEL spec: volumes: - - name: config-volume-g3auto + - name: gen3-discovery-ai-g3auto-volume secret: secretName: gen3-discovery-ai-g3auto - - name: gen3-discovery-ai-knowledge-library + - name: gen3-discovery-ai-knowledge-library-volume configMap: name: gen3-discovery-ai-knowledge-library containers: @@ -37,11 +37,11 @@ spec: - name: GEN3_DEBUG GEN3_DEBUG_FLAG|-value: "False"-| volumeMounts: - - name: config-volume-g3auto + - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /.env subPath: env - - name: gen3-discovery-ai-knowledge-library + - name: gen3-discovery-ai-knowledge-library-volume readOnly: false mountPath: /knowledge imagePullPolicy: Always From fde3de87f99c169f1c3d360c0b0f0cbd5e5b1372 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 12:51:59 -0500 Subject: [PATCH 10/72] fix(ai): fix mount path for cfg --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index ca7cdd8b0..d326f2781 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -39,11 +39,11 @@ spec: volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /.env + mountPath: /gen3discoveryai/.env subPath: env - name: gen3-discovery-ai-knowledge-library-volume readOnly: false - mountPath: /knowledge + mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always resources: requests: From 38547527e8a88e0567b20ce08200d67236a09669 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 14:32:40 -0500 Subject: [PATCH 11/72] fix(ai): first attempt to fix issue of needing write volume for chromadb persistance --- .../gen3-discovery-ai-deploy.yaml | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index d326f2781..9ca3db97e 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -24,10 +24,19 @@ spec: - name: gen3-discovery-ai-g3auto-volume secret: secretName: gen3-discovery-ai-g3auto - - name: gen3-discovery-ai-knowledge-library-volume + - name: tmp-readonly-configmap-knowledge-library configMap: name: gen3-discovery-ai-knowledge-library - containers: + - name: gen3-discovery-ai-knowledge-library-volume + emptyDir: {} + initContainers: + # configmaps mount to read-only volumes and chromadb's persisted disk + # excepts the ability to write. + # + # Solution: utilize emptyDir as a writable space. + # + # Procedure: in init container, mount configmap into readonly space, copy files to writable + # space in emptyDir, use files from writable in final container - name: gen3-discovery-ai GEN3_GEN3-DISCOVERY-AI_IMAGE imagePullPolicy: Always @@ -42,8 +51,9 @@ spec: mountPath: /gen3discoveryai/.env subPath: env - name: gen3-discovery-ai-knowledge-library-volume - readOnly: false mountPath: /gen3discoveryai/knowledge + - name: tmp-readonly-configmap-knowledge-library + mountPath: /tmp/readonly imagePullPolicy: Always resources: requests: @@ -51,3 +61,29 @@ spec: limits: cpu: 2 memory: 512Mi + commands: [ + 'sh', '-c', 'cp -R /tmp/readonly /gen3discoveryai/knowledge' + ] + containers: + - name: gen3-discovery-ai + GEN3_GEN3-DISCOVERY-AI_IMAGE + imagePullPolicy: Always + ports: + - containerPort: 8080 + env: + - name: GEN3_DEBUG + GEN3_DEBUG_FLAG|-value: "False"-| + volumeMounts: + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/.env + subPath: env + - name: gen3-discovery-ai-knowledge-library-volume + mountPath: /gen3discoveryai/knowledge + imagePullPolicy: Always + resources: + requests: + cpu: 1 + limits: + cpu: 2 + memory: 512Mi \ No newline at end of file From 44819fc1319ad61add80a57c4ab6aed00d261f75 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 15:00:48 -0500 Subject: [PATCH 12/72] fix(ai): k8s deploy command --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 9ca3db97e..e3c28160e 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -61,7 +61,7 @@ spec: limits: cpu: 2 memory: 512Mi - commands: [ + command: [ 'sh', '-c', 'cp -R /tmp/readonly /gen3discoveryai/knowledge' ] containers: From 91232fea155f724ad1d953af49e984b3308d8187 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 26 Oct 2023 15:01:55 -0500 Subject: [PATCH 13/72] fix(ai): fix duplicate name --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index e3c28160e..c086f5c27 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -37,7 +37,7 @@ spec: # # Procedure: in init container, mount configmap into readonly space, copy files to writable # space in emptyDir, use files from writable in final container - - name: gen3-discovery-ai + - name: gen3-discovery-ai-init GEN3_GEN3-DISCOVERY-AI_IMAGE imagePullPolicy: Always ports: From c5ab539df047a5c863f17eaba9778ab77b7466cf Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 10:02:45 -0500 Subject: [PATCH 14/72] chore(ai): don't sent telemetry data --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index c086f5c27..b7d254baa 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -45,6 +45,8 @@ spec: env: - name: GEN3_DEBUG GEN3_DEBUG_FLAG|-value: "False"-| + - name: ANONYMIZED_TELEMETRY + value: False volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true @@ -73,6 +75,8 @@ spec: env: - name: GEN3_DEBUG GEN3_DEBUG_FLAG|-value: "False"-| + - name: ANONYMIZED_TELEMETRY + value: False volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true From b7719b4508a35428d29599ab75e13ca87352c91d Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 10:21:32 -0500 Subject: [PATCH 15/72] chore(ai): more logging in init --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index b7d254baa..52d8ed188 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -46,7 +46,7 @@ spec: - name: GEN3_DEBUG GEN3_DEBUG_FLAG|-value: "False"-| - name: ANONYMIZED_TELEMETRY - value: False + value: "False" volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true @@ -64,7 +64,7 @@ spec: cpu: 2 memory: 512Mi command: [ - 'sh', '-c', 'cp -R /tmp/readonly /gen3discoveryai/knowledge' + 'sh', '-c', 'echo before && ls -la /tmp/readonly && ls -la /gen3discoveryai/knowledge && cp -R /tmp/readonly /gen3discoveryai/knowledge && echo after && ls -la /tmp/readonly && ls -la /gen3discoveryai/knowledge' ] containers: - name: gen3-discovery-ai @@ -76,7 +76,7 @@ spec: - name: GEN3_DEBUG GEN3_DEBUG_FLAG|-value: "False"-| - name: ANONYMIZED_TELEMETRY - value: False + value: "False" volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true From e49805883b6cccbc60af77c044d45d8c154d2f92 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 10:37:44 -0500 Subject: [PATCH 16/72] chore(logs): more --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 52d8ed188..7e3c39431 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -64,7 +64,7 @@ spec: cpu: 2 memory: 512Mi command: [ - 'sh', '-c', 'echo before && ls -la /tmp/readonly && ls -la /gen3discoveryai/knowledge && cp -R /tmp/readonly /gen3discoveryai/knowledge && echo after && ls -la /tmp/readonly && ls -la /gen3discoveryai/knowledge' + 'sh', '-c', 'echo BEFORE /tmp/readonly && ls -Ra /tmp/readonly && echo BEFORE /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge && echo RUN cp -r /tmp/readonly /gen3discoveryai/knowledge && cp -R /tmp/readonly /gen3discoveryai/knowledge && echo AFTER /tmp/readonly && ls -Ra /tmp/readonly && echo AFTER /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge' ] containers: - name: gen3-discovery-ai From ddf47848ae42f22892be3cb49b899657ae5d2fed Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 10:40:42 -0500 Subject: [PATCH 17/72] fix(ai): mv instead of cp --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 7e3c39431..98436959a 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -64,7 +64,7 @@ spec: cpu: 2 memory: 512Mi command: [ - 'sh', '-c', 'echo BEFORE /tmp/readonly && ls -Ra /tmp/readonly && echo BEFORE /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge && echo RUN cp -r /tmp/readonly /gen3discoveryai/knowledge && cp -R /tmp/readonly /gen3discoveryai/knowledge && echo AFTER /tmp/readonly && ls -Ra /tmp/readonly && echo AFTER /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge' + 'sh', '-c', 'echo BEFORE /tmp/readonly && ls -Ra /tmp/readonly && echo BEFORE /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge && echo RUN mv /tmp/readonly /gen3discoveryai/knowledge && mv /tmp/readonly /gen3discoveryai/knowledge && echo AFTER /tmp/readonly && ls -Ra /tmp/readonly && echo AFTER /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge' ] containers: - name: gen3-discovery-ai From 0d05588540cdf5f40363dfd076895eb1033cd997 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 10:42:43 -0500 Subject: [PATCH 18/72] fix(ai): back to cp, can't mv b/c of readonly --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 98436959a..c634d847b 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -64,7 +64,7 @@ spec: cpu: 2 memory: 512Mi command: [ - 'sh', '-c', 'echo BEFORE /tmp/readonly && ls -Ra /tmp/readonly && echo BEFORE /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge && echo RUN mv /tmp/readonly /gen3discoveryai/knowledge && mv /tmp/readonly /gen3discoveryai/knowledge && echo AFTER /tmp/readonly && ls -Ra /tmp/readonly && echo AFTER /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge' + 'sh', '-c', 'echo BEFORE /tmp/readonly && ls -Ra /tmp/readonly && echo BEFORE /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge && echo RUN cp -r /tmp/readonly /gen3discoveryai/knowledge && cp -r /tmp/readonly /gen3discoveryai/knowledge && echo AFTER /tmp/readonly && ls -Ra /tmp/readonly && echo AFTER /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge' ] containers: - name: gen3-discovery-ai From 19cd76e529569cd83f75e91f13f346995b1f16a6 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 13:44:34 -0500 Subject: [PATCH 19/72] feat(ai): use s3 and service account + role to handle persisted vectorstore data instead of configmap --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 73 +++++++++++++++++-- .../gen3-discovery-ai-deploy.yaml | 30 ++++++-- 2 files changed, 88 insertions(+), 15 deletions(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index 351d33721..f1ed3edc1 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -61,16 +61,75 @@ fi # exit 1 # fi +setup_storage() { + local saName="gen3-discovery-ai-sa" + g3kubectl create sa "$saName" > /dev/null 2>&1 || true -if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then - g3kubectl delete configmap gen3-discovery-ai-knowledge-library - g3kubectl create configmap gen3-discovery-ai-knowledge-library --from-file "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" + local secret + local secretsFolder="$(gen3_secrets_folder)/g3auto/gen3-discovery-ai" + if ! secret="$(g3kubectl get secret gen3-discovery-ai-g3auto -o json 2> /dev/null)" \ + || "false" == "$(jq -r '.data | has("storage_config.json")' <<< "$secret")"; then + + gen3_log_info "setting up storage for gen3-discovery-ai service" + # + # gen3-discovery-ai-g3auto secret still does not exist + # we need to setup an S3 bucket and IAM creds + # let's avoid creating multiple buckets for different + # deployments to the same k8s cluseter (dev, etc) + # + local bucketName + local accountNumber + local environment + + if ! accountNumber="$(aws sts get-caller-identity --output text --query 'Account')"; then + gen3_log_err "could not determine account numer" + return 1 + fi + + if ! environment="$(g3kubectl get configmap manifest-global -o json | jq -r .data.environment)"; then + gen3_log_err "could not determine environment from manifest-global - bailing out of gen3-discovery-ai setup" + return 1 + fi + + # try to come up with a unique but composable bucket name + bucketName="gen3-discovery-ai-${accountNumber}-${environment//_/-}" + if aws s3 ls --page-size 1 "s3://${bucketName}" > /dev/null 2>&1; then + gen3_log_info "${bucketName} s3 bucket already exists - probably in use by another namespace - copy the creds from there to $(gen3_secrets_folder)/g3auto/gen3-discovery-ai" + # continue on ... + elif ! gen3 s3 create "${bucketName}"; then + gen3_log_err "maybe failed to create bucket ${bucketName}, but maybe not, because the terraform script is flaky" + fi + + local hostname + hostname="$(gen3 api hostname)" + jq -r -n --arg bucket "${bucketName}" --arg hostname "${hostname}" '.bucket=$bucket | .prefix=$hostname' > "${secretsFolder}/storage_config.json" + gen3 secrets sync 'setup gen3-discovery-ai credentials' + + local roleName + roleName="$(gen3 api safe-name gen3-discovery-ai)" || return 1 + + if ! gen3 awsrole info "$roleName" > /dev/null; then # setup role + bucketName="$( (gen3 secrets decode gen3-discovery-ai-g3auto storage_config.json || echo ERROR) | jq -r .bucket)" || return 1 + gen3 awsrole create "$roleName" "$saName" || return 1 + gen3 s3 attach-bucket-policy "$bucketName" --read-write --role-name "${roleName}" + # try to give the gitops role read/write permissions on the bucket + local gitopsRoleName + gitopsRoleName="$(gen3 api safe-name gitops)" + gen3 s3 attach-bucket-policy "$bucketName" --read-write --role-name "${gitopsRoleName}" + fi + fi +} + +if ! setup_storage; then + gen3_log_err "kube-setup-gen3-discovery-ai bailing out - storage failed setup" + exit 1 fi -# Sync the manifest config from manifest.json (or manifests/gen3-discovery-ai.json) to the k8s config map. -# This may not actually create the manifest-gen3-discovery-ai config map if the user did not specify any gen3-discovery-ai -# keys in their manifest configuration. -[[ -z "$GEN3_ROLL_ALL" ]] && gen3 gitops configmaps + +if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then + bucketName="$( (gen3 secrets decode gen3-discovery-ai-g3auto storage_config.json || echo ERROR) | jq -r .bucket)" || return 1 + aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/" "s3://$(bucketName)/chromadb" +fi gen3 roll gen3-discovery-ai g3kubectl apply -f "${GEN3_HOME}/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml" diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index c634d847b..d48a36028 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -20,13 +20,11 @@ spec: release: production GEN3_DATE_LABEL spec: + serviceAccountName: gen3-discovery-ai-sa volumes: - name: gen3-discovery-ai-g3auto-volume secret: secretName: gen3-discovery-ai-g3auto - - name: tmp-readonly-configmap-knowledge-library - configMap: - name: gen3-discovery-ai-knowledge-library - name: gen3-discovery-ai-knowledge-library-volume emptyDir: {} initContainers: @@ -54,8 +52,6 @@ spec: subPath: env - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge - - name: tmp-readonly-configmap-knowledge-library - mountPath: /tmp/readonly imagePullPolicy: Always resources: requests: @@ -63,9 +59,27 @@ spec: limits: cpu: 2 memory: 512Mi - command: [ - 'sh', '-c', 'echo BEFORE /tmp/readonly && ls -Ra /tmp/readonly && echo BEFORE /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge && echo RUN cp -r /tmp/readonly /gen3discoveryai/knowledge && cp -r /tmp/readonly /gen3discoveryai/knowledge && echo AFTER /tmp/readonly && ls -Ra /tmp/readonly && echo AFTER /gen3discoveryai/knowledge && ls -Ra /gen3discoveryai/knowledge' - ] + command: ["/bin/bash"] + args: + - "-c" + - | + bucketName="$( (gen3 secrets decode gen3-discovery-ai-g3auto storage_config.json || echo ERROR) | jq -r .bucket)" + echo "BUCKET: $bucketName + echo + echo BEFORE aws s3 ls $bucketName + aws s3 ls $bucketName + echo + echo BEFORE /gen3discoveryai/knowledge + ls -Ra /gen3discoveryai/knowledge + echo + echo RUN aws s3 sync "s3://$(bucketName)/chromadb" "/gen3discoveryai/knowledge" + aws s3 sync "s3://$(bucketName)/chromadb" "/gen3discoveryai/knowledge" + echo + echo AFTER aws s3 ls $bucketName + aws s3 ls $bucketName + echo + echo AFTER /gen3discoveryai/knowledge + ls -Ra /gen3discoveryai/knowledge containers: - name: gen3-discovery-ai GEN3_GEN3-DISCOVERY-AI_IMAGE From 00b6dcf8d4f37944dffa862a79b3a97544e5ab6b Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 13:52:12 -0500 Subject: [PATCH 20/72] fix(ai): fix setup --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index f1ed3edc1..4bee529d6 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -127,8 +127,8 @@ fi if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then - bucketName="$( (gen3 secrets decode gen3-discovery-ai-g3auto storage_config.json || echo ERROR) | jq -r .bucket)" || return 1 - aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/" "s3://$(bucketName)/chromadb" + bucketName="$( (gen3 secrets decode gen3-discovery-ai-g3auto storage_config.json || echo ERROR) | jq -r .bucket)" || exit 1 + aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/" "s3://$bucketName/chromadb" fi gen3 roll gen3-discovery-ai From a9c49b4d3ea363790848b67d06b71880361c8d30 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:01:10 -0500 Subject: [PATCH 21/72] fix(ai): fix setup --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 7 +++++++ .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index 4bee529d6..95016c5ba 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -86,13 +86,20 @@ setup_storage() { return 1 fi + gen3_log_info "accountNumber: ${accountNumber}" + if ! environment="$(g3kubectl get configmap manifest-global -o json | jq -r .data.environment)"; then gen3_log_err "could not determine environment from manifest-global - bailing out of gen3-discovery-ai setup" return 1 fi + gen3_log_info "environment: ${environment}" + # try to come up with a unique but composable bucket name bucketName="gen3-discovery-ai-${accountNumber}-${environment//_/-}" + + gen3_log_info "bucketName: ${bucketName}" + if aws s3 ls --page-size 1 "s3://${bucketName}" > /dev/null 2>&1; then gen3_log_info "${bucketName} s3 bucket already exists - probably in use by another namespace - copy the creds from there to $(gen3_secrets_folder)/g3auto/gen3-discovery-ai" # continue on ... diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index d48a36028..f188fba6f 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -72,8 +72,8 @@ spec: echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo - echo RUN aws s3 sync "s3://$(bucketName)/chromadb" "/gen3discoveryai/knowledge" - aws s3 sync "s3://$(bucketName)/chromadb" "/gen3discoveryai/knowledge" + echo RUN aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" + aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" echo echo AFTER aws s3 ls $bucketName aws s3 ls $bucketName From 04689b938aa91eee3ec0415db8319a109cd69a9f Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:04:03 -0500 Subject: [PATCH 22/72] fix(ai): fix automation --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index 95016c5ba..d5dcbfa9a 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -97,7 +97,7 @@ setup_storage() { # try to come up with a unique but composable bucket name bucketName="gen3-discovery-ai-${accountNumber}-${environment//_/-}" - + gen3_log_info "bucketName: ${bucketName}" if aws s3 ls --page-size 1 "s3://${bucketName}" > /dev/null 2>&1; then @@ -116,7 +116,7 @@ setup_storage() { roleName="$(gen3 api safe-name gen3-discovery-ai)" || return 1 if ! gen3 awsrole info "$roleName" > /dev/null; then # setup role - bucketName="$( (gen3 secrets decode gen3-discovery-ai-g3auto storage_config.json || echo ERROR) | jq -r .bucket)" || return 1 + bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || return 1 gen3 awsrole create "$roleName" "$saName" || return 1 gen3 s3 attach-bucket-policy "$bucketName" --read-write --role-name "${roleName}" # try to give the gitops role read/write permissions on the bucket @@ -125,16 +125,18 @@ setup_storage() { gen3 s3 attach-bucket-policy "$bucketName" --read-write --role-name "${gitopsRoleName}" fi fi + + return 0 } -if ! setup_storage; then +if ! setup_storage(); then gen3_log_err "kube-setup-gen3-discovery-ai bailing out - storage failed setup" exit 1 fi if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then - bucketName="$( (gen3 secrets decode gen3-discovery-ai-g3auto storage_config.json || echo ERROR) | jq -r .bucket)" || exit 1 + bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/" "s3://$bucketName/chromadb" fi From 1bd41b626d51b9b9a02342cd9cbd3a1231b805b3 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:06:04 -0500 Subject: [PATCH 23/72] fix(ai): automation --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index d5dcbfa9a..4c1e36d1d 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -129,11 +129,12 @@ setup_storage() { return 0 } -if ! setup_storage(); then +if ! setup_storage; then gen3_log_err "kube-setup-gen3-discovery-ai bailing out - storage failed setup" exit 1 fi +gen3_log_info "Setup completed, syncing configuration to bucket" if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 From 9a45888a2fdc594ac86876a980ce454792fb748e Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:16:04 -0500 Subject: [PATCH 24/72] fix(ai): fix logic for setup --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index 4c1e36d1d..5c2c91fa9 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -67,9 +67,12 @@ setup_storage() { local secret local secretsFolder="$(gen3_secrets_folder)/g3auto/gen3-discovery-ai" - if ! secret="$(g3kubectl get secret gen3-discovery-ai-g3auto -o json 2> /dev/null)" \ - || "false" == "$(jq -r '.data | has("storage_config.json")' <<< "$secret")"; then + secret="$(g3kubectl get secret gen3-discovery-ai-g3auto -o json 2> /dev/null)" + local hasStorageCfg + hasStorageCfg=$(jq -r '.data | has("storage_config.json")' <<< "$secret") + + if [ "$hasStorageCfg" = "false" ]; then gen3_log_info "setting up storage for gen3-discovery-ai service" # # gen3-discovery-ai-g3auto secret still does not exist @@ -134,7 +137,7 @@ if ! setup_storage; then exit 1 fi -gen3_log_info "Setup completed, syncing configuration to bucket" +gen3_log_info "Setup complete, syncing configuration to bucket" if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 From 265398dde2a4cc62960a381b0a8a50ed27507608 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:37:39 -0500 Subject: [PATCH 25/72] fix(ai): mount storage config and don't use gen3/jq since they're not available --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index f188fba6f..85f911b87 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -50,6 +50,10 @@ spec: readOnly: true mountPath: /gen3discoveryai/.env subPath: env + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/storage_config.json + subPath: /gen3discoveryai/storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always @@ -63,7 +67,7 @@ spec: args: - "-c" - | - bucketName="$( (gen3 secrets decode gen3-discovery-ai-g3auto storage_config.json || echo ERROR) | jq -r .bucket)" + bucketName=$(grep -o "\"bucket\": *\"[^\"]*\"" /gen3discoveryai/storage_config.json | awk -F'"' '{print $4}') echo "BUCKET: $bucketName echo echo BEFORE aws s3 ls $bucketName From 6b2da79792d6a6fed06cd2945d45f1e3c0031acf Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:40:29 -0500 Subject: [PATCH 26/72] fix(ai): fix wrong path --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 85f911b87..6770ef886 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -53,7 +53,7 @@ spec: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/storage_config.json - subPath: /gen3discoveryai/storage_config.json + subPath: storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always From 8a09f4ecc6af381976e7d2a11d5d0ba4dc579860 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:45:27 -0500 Subject: [PATCH 27/72] fix(ai): quotes --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 6770ef886..d771117c1 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -70,8 +70,8 @@ spec: bucketName=$(grep -o "\"bucket\": *\"[^\"]*\"" /gen3discoveryai/storage_config.json | awk -F'"' '{print $4}') echo "BUCKET: $bucketName echo - echo BEFORE aws s3 ls $bucketName - aws s3 ls $bucketName + echo BEFORE aws s3 ls "$bucketName" + aws s3 ls "$bucketName" echo echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge @@ -79,8 +79,8 @@ spec: echo RUN aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" echo - echo AFTER aws s3 ls $bucketName - aws s3 ls $bucketName + echo AFTER aws s3 ls "$bucketName" + aws s3 ls "$bucketName" echo echo AFTER /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge From fff20fe8d232fbe1c95bda9c276d46c640b16469 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:48:20 -0500 Subject: [PATCH 28/72] fix(ai): quoting --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index d771117c1..587f07204 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -68,7 +68,7 @@ spec: - "-c" - | bucketName=$(grep -o "\"bucket\": *\"[^\"]*\"" /gen3discoveryai/storage_config.json | awk -F'"' '{print $4}') - echo "BUCKET: $bucketName + echo BUCKET: "$bucketName" echo echo BEFORE aws s3 ls "$bucketName" aws s3 ls "$bucketName" From 88cb769b5b7fa643d32a9df99abede15d64422cc Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:51:30 -0500 Subject: [PATCH 29/72] fix(ai): use awshelper for access to aws commands --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 587f07204..ca70e81db 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -28,15 +28,14 @@ spec: - name: gen3-discovery-ai-knowledge-library-volume emptyDir: {} initContainers: - # configmaps mount to read-only volumes and chromadb's persisted disk - # excepts the ability to write. + # chromadb's persisted disk expects the ability to write. # # Solution: utilize emptyDir as a writable space. # - # Procedure: in init container, mount configmap into readonly space, copy files to writable + # Procedure: in init container, copy files from s3 to writable # space in emptyDir, use files from writable in final container - name: gen3-discovery-ai-init - GEN3_GEN3-DISCOVERY-AI_IMAGE + GEN3_AWSHELPER_IMAGE|-image: quay.io/cdis/awshelper:master-| imagePullPolicy: Always ports: - containerPort: 8080 From e396dabce0822ab3a110a24c0336ccd04cbf1482 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 14:57:25 -0500 Subject: [PATCH 30/72] fix(ai): move files to correct location --- .../services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index ca70e81db..1eb378122 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -75,8 +75,9 @@ spec: echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo - echo RUN aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" - aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" + aws s3 sync "s3://${bucketName}" "/gen3discoveryai/knowledge/tmp" + mv /gen3discoveryai/knowledge/tmp/chromadb/.* /gen3discoveryai/knowledge + rm -r /gen3discoveryai/knowledge/tmp echo echo AFTER aws s3 ls "$bucketName" aws s3 ls "$bucketName" From c2d5c1dc7733bcf8b0f17d3c5e6339c432349557 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 15:02:57 -0500 Subject: [PATCH 31/72] fix(ai): only get folder --- .../services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 1eb378122..ca70e81db 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -75,9 +75,8 @@ spec: echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo - aws s3 sync "s3://${bucketName}" "/gen3discoveryai/knowledge/tmp" - mv /gen3discoveryai/knowledge/tmp/chromadb/.* /gen3discoveryai/knowledge - rm -r /gen3discoveryai/knowledge/tmp + echo RUN aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" + aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" echo echo AFTER aws s3 ls "$bucketName" aws s3 ls "$bucketName" From 9c01535e156ce0ea196beb8e5cff9bfa458c801a Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 15:05:39 -0500 Subject: [PATCH 32/72] fix(ai): fix sync --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index 5c2c91fa9..3f1ebc5c5 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -141,7 +141,7 @@ gen3_log_info "Setup complete, syncing configuration to bucket" if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 - aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/" "s3://$bucketName/chromadb" + aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" "s3://$bucketName/chromadb" fi gen3 roll gen3-discovery-ai From d8a31718d98b3af89dc159cb16d8a3cc3acb344e Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 27 Oct 2023 15:40:01 -0500 Subject: [PATCH 33/72] fix(ai): clear folder before syncing --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index 3f1ebc5c5..c8338afbc 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -141,7 +141,7 @@ gen3_log_info "Setup complete, syncing configuration to bucket" if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 - aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" "s3://$bucketName/chromadb" + aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" "s3://$bucketName/chromadb" --delete fi gen3 roll gen3-discovery-ai From 007542982f9cf2e77f40102a4f1f314ab5ab3579 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Mon, 30 Oct 2023 13:28:06 -0500 Subject: [PATCH 34/72] fix(ai): update bucket contents every roll for updates --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 6 +-- kube/services/gen3-discovery-ai/README.md | 45 +++++++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 kube/services/gen3-discovery-ai/README.md diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index c8338afbc..5769a5f48 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -139,10 +139,8 @@ fi gen3_log_info "Setup complete, syncing configuration to bucket" -if [ -d "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" ]; then - bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 - aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" "s3://$bucketName/chromadb" --delete -fi +bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 +aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" "s3://$bucketName/chromadb" --delete gen3 roll gen3-discovery-ai g3kubectl apply -f "${GEN3_HOME}/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml" diff --git a/kube/services/gen3-discovery-ai/README.md b/kube/services/gen3-discovery-ai/README.md new file mode 100644 index 000000000..fb25d8201 --- /dev/null +++ b/kube/services/gen3-discovery-ai/README.md @@ -0,0 +1,45 @@ +# Gen3 Discovery AI + +## Populating Disc for In-Memory Vectordb Chromadb + +In order to setup pre-configured topics, we need to load a bunch of data +into Chromadb (which is an inmem vectordb with an option to persist to disk). + +To load topics consistently, we setup an S3 bucket to house the persisted +vectordb. + +### Getting data into S3 + +Run the service elsewhere, load the data, and persist it to disk. Then move those +files from disk into the VM. The expectation is that for Chromadb loading, the +files are placed in a `gen3-discovery-ai/knowledge/chromadb` folder relative to +where the `manifest.json` is. For example: +`~/cdis-manifest/avantol.planx-pla.net/gen3-discovery-ai/gen3-discovery-ai/knowledge/chromadb` + +You can rsync from local if you've generated it locally. + +#### IMPORTANT: Use the same service image to generate the data locally as is used in the environment + +> IMPORTANT NOTE: There are some oddities with using the persist to disk across different OS's with different security packages. + +You should run the store knowledge commands that eventually create the persisted +disk from within the SAME IMAGE that gets deployed. + +One way to do this is as follows: + +* Use docker to build the image locally and run it with a volume mount +* exec into the running container +* run commands necessary to load the knowledge +* check the location of the volume mount on your host system for the persisted data +* rsync that data to the data commons (or check into cdis-manifest) + +See the Gen3 Discovery AI service repo README for more info. + +``` +rsync -re ssh --progress ~/repos/gen3-discovery-ai/knowledge/ avantol@cdistest_dev.csoc:~/cdis-manifest/avantol.planx-pla.net/gen3-discovery-ai/knowledge/chromadb +``` + +### Getting data from S3 in mem + +We specify a path for Chromadb to use for persisted data and when it sees +data there, it loads it in. \ No newline at end of file From 40660df9be4236da6d54b75bffb0c4e456f4a064 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Wed, 1 Nov 2023 16:08:53 -0500 Subject: [PATCH 35/72] feat(ai): support TSV loading from manifest config --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 1 + .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index 5769a5f48..d31db7c18 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -141,6 +141,7 @@ gen3_log_info "Setup complete, syncing configuration to bucket" bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" "s3://$bucketName/chromadb" --delete +aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/tsvs" "s3://$bucketName/tsvs" --delete gen3 roll gen3-discovery-ai g3kubectl apply -f "${GEN3_HOME}/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml" diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index ca70e81db..31036a727 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -75,11 +75,11 @@ spec: echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo - echo RUN aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" - aws s3 sync "s3://${bucketName}/chromadb" "/gen3discoveryai/knowledge" - echo - echo AFTER aws s3 ls "$bucketName" - aws s3 ls "$bucketName" + echo syncing from s3, running load_into_knowledge_store.py + aws s3 sync "s3://${bucketName}/tsvs" "/gen3discoveryai/knowledge/tmp" + python3 /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp + mv /gen3discoveryai/knowledge/tmp/ /gen3discoveryai/knowledge/ + rm -r /gen3discoveryai/knowledge/tmp/ echo echo AFTER /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge From 156fc5fa38f642b3086cbfe2e1c57590f4a320e1 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Wed, 1 Nov 2023 16:15:01 -0500 Subject: [PATCH 36/72] fix(ai): fix init so aws syncing is done with awshelper image and loading into vectorstore is with service image --- .../gen3-discovery-ai-deploy.yaml | 55 ++++++++++++++++--- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 31036a727..83a2c7648 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -32,9 +32,11 @@ spec: # # Solution: utilize emptyDir as a writable space. # - # Procedure: in init container, copy files from s3 to writable - # space in emptyDir, use files from writable in final container - - name: gen3-discovery-ai-init + # Procedure: in init containers, copy files from s3 to writable + # temporary space in emptyDir, use files from writable space + # to load into knowledge libary, move final knowledge library + # files into top-level emptyDir and make available in final container + - name: gen3-discovery-ai-aws-init GEN3_AWSHELPER_IMAGE|-image: quay.io/cdis/awshelper:master-| imagePullPolicy: Always ports: @@ -69,14 +71,53 @@ spec: bucketName=$(grep -o "\"bucket\": *\"[^\"]*\"" /gen3discoveryai/storage_config.json | awk -F'"' '{print $4}') echo BUCKET: "$bucketName" echo - echo BEFORE aws s3 ls "$bucketName" - aws s3 ls "$bucketName" - echo echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo - echo syncing from s3, running load_into_knowledge_store.py + echo syncing from s3 aws s3 sync "s3://${bucketName}/tsvs" "/gen3discoveryai/knowledge/tmp" + mv /gen3discoveryai/knowledge/tmp/ /gen3discoveryai/knowledge/ + rm -r /gen3discoveryai/knowledge/tmp/ + echo + echo AFTER /gen3discoveryai/knowledge + ls -Ra /gen3discoveryai/knowledge + - name: gen3-discovery-ai-knowledge-init + GEN3_GEN3-DISCOVERY-AI_IMAGE + imagePullPolicy: Always + ports: + - containerPort: 8080 + env: + - name: GEN3_DEBUG + GEN3_DEBUG_FLAG|-value: "False"-| + - name: ANONYMIZED_TELEMETRY + value: "False" + volumeMounts: + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/.env + subPath: env + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/storage_config.json + subPath: storage_config.json + - name: gen3-discovery-ai-knowledge-library-volume + mountPath: /gen3discoveryai/knowledge + imagePullPolicy: Always + resources: + requests: + cpu: 1 + limits: + cpu: 2 + memory: 512Mi + command: ["/bin/bash"] + args: + - "-c" + - | + echo + echo BEFORE /gen3discoveryai/knowledge + ls -Ra /gen3discoveryai/knowledge + echo + echo running load_into_knowledge_store.py python3 /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp mv /gen3discoveryai/knowledge/tmp/ /gen3discoveryai/knowledge/ rm -r /gen3discoveryai/knowledge/tmp/ From cdd8d53a5fafac055183e94cce508b1ae9898a8b Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Wed, 1 Nov 2023 16:53:14 -0500 Subject: [PATCH 37/72] fix(ai): fix loading --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 1 + kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index d31db7c18..47ddeaf62 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -140,6 +140,7 @@ fi gen3_log_info "Setup complete, syncing configuration to bucket" bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 +# NOTE: loading chromadb files directly like this causes some issues... aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" "s3://$bucketName/chromadb" --delete aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/tsvs" "s3://$bucketName/tsvs" --delete diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 83a2c7648..0ccfcd6f2 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -76,8 +76,6 @@ spec: echo echo syncing from s3 aws s3 sync "s3://${bucketName}/tsvs" "/gen3discoveryai/knowledge/tmp" - mv /gen3discoveryai/knowledge/tmp/ /gen3discoveryai/knowledge/ - rm -r /gen3discoveryai/knowledge/tmp/ echo echo AFTER /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge From 6084a5326ab2f5ce7f653bd8373e3bc5e09ea000 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Wed, 1 Nov 2023 16:55:22 -0500 Subject: [PATCH 38/72] fix(ai): fix loading --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 0ccfcd6f2..af680953f 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -116,8 +116,7 @@ spec: ls -Ra /gen3discoveryai/knowledge echo echo running load_into_knowledge_store.py - python3 /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp - mv /gen3discoveryai/knowledge/tmp/ /gen3discoveryai/knowledge/ + poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp rm -r /gen3discoveryai/knowledge/tmp/ echo echo AFTER /gen3discoveryai/knowledge From 68dd4a7cedfa3153177b8ec14967ae230c18eb63 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 3 Nov 2023 09:58:11 -0500 Subject: [PATCH 39/72] fix(ai): sync all files --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index 47ddeaf62..abf3f9337 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -140,9 +140,7 @@ fi gen3_log_info "Setup complete, syncing configuration to bucket" bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1 -# NOTE: loading chromadb files directly like this causes some issues... -aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/chromadb" "s3://$bucketName/chromadb" --delete -aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge/tsvs" "s3://$bucketName/tsvs" --delete +aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge" "s3://$bucketName" --delete gen3 roll gen3-discovery-ai g3kubectl apply -f "${GEN3_HOME}/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml" From 1afee002cbced5ac1c6d295bf5c5d43ff5d2fabc Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 3 Nov 2023 12:40:07 -0500 Subject: [PATCH 40/72] feat(ai): add google secret loading and mounting --- kube/services/gen3-discovery-ai/README.md | 44 +++++++------------ .../gen3-discovery-ai-deploy.yaml | 7 +++ 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/kube/services/gen3-discovery-ai/README.md b/kube/services/gen3-discovery-ai/README.md index fb25d8201..7895f30a9 100644 --- a/kube/services/gen3-discovery-ai/README.md +++ b/kube/services/gen3-discovery-ai/README.md @@ -1,4 +1,11 @@ -# Gen3 Discovery AI +# Gen3 Discovery AI Configuration + +Expects configuration in a `gen3-discovery-ai` folder relative to +where the `manifest.json` is. + +Expects secrets setup in `g3auto/gen3-discovery-ai` folder + - `credentials.json`: Google service account key if using a topic with Google Vertex AI + - `env`: .env file contents for service configuration (see service repo for a default one) ## Populating Disc for In-Memory Vectordb Chromadb @@ -6,40 +13,23 @@ In order to setup pre-configured topics, we need to load a bunch of data into Chromadb (which is an inmem vectordb with an option to persist to disk). To load topics consistently, we setup an S3 bucket to house the persisted -vectordb. +data for the vectordb. ### Getting data into S3 -Run the service elsewhere, load the data, and persist it to disk. Then move those -files from disk into the VM. The expectation is that for Chromadb loading, the -files are placed in a `gen3-discovery-ai/knowledge/chromadb` folder relative to -where the `manifest.json` is. For example: -`~/cdis-manifest/avantol.planx-pla.net/gen3-discovery-ai/gen3-discovery-ai/knowledge/chromadb` - -You can rsync from local if you've generated it locally. - -#### IMPORTANT: Use the same service image to generate the data locally as is used in the environment +We could support more than TSVs in the future, but for now that's the only automated support. -> IMPORTANT NOTE: There are some oddities with using the persist to disk across different OS's with different security packages. - -You should run the store knowledge commands that eventually create the persisted -disk from within the SAME IMAGE that gets deployed. - -One way to do this is as follows: +Move TSVs of data into the configuration in cdis-manifest. The expectation is that for Chromadb loading, the +files are placed in a `gen3-discovery-ai/knowledge/tsvs` folder relative to +where the `manifest.json` is. For example: +`~/cdis-manifest/avantol.planx-pla.net/gen3-discovery-ai/gen3-discovery-ai/knowledge/tsvs` -* Use docker to build the image locally and run it with a volume mount -* exec into the running container -* run commands necessary to load the knowledge -* check the location of the volume mount on your host system for the persisted data -* rsync that data to the data commons (or check into cdis-manifest) +You can rsync from local if you have files locally. See the Gen3 Discovery AI service repo README for more info. -``` -rsync -re ssh --progress ~/repos/gen3-discovery-ai/knowledge/ avantol@cdistest_dev.csoc:~/cdis-manifest/avantol.planx-pla.net/gen3-discovery-ai/knowledge/chromadb -``` - ### Getting data from S3 in mem We specify a path for Chromadb to use for persisted data and when it sees -data there, it loads it in. \ No newline at end of file +data there, it loads it in. So the deployment automation aws syncs the bucket +and then calls a script to load the files into the in-mem vectorstore from there. \ No newline at end of file diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index af680953f..8cea83047 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -46,11 +46,18 @@ spec: GEN3_DEBUG_FLAG|-value: "False"-| - name: ANONYMIZED_TELEMETRY value: "False" + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /gen3discoveryai/credentials.json volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/.env subPath: env + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/credentials.json + subPath: credentials.json + optional: true - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/storage_config.json From ca49a597db2a6f005b5144c8ee51d04118c089af Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 3 Nov 2023 13:05:13 -0500 Subject: [PATCH 41/72] fix(ai): mount to container, not inits --- .../gen3-discovery-ai-deploy.yaml | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 8cea83047..2a314f6b1 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -44,20 +44,7 @@ spec: env: - name: GEN3_DEBUG GEN3_DEBUG_FLAG|-value: "False"-| - - name: ANONYMIZED_TELEMETRY - value: "False" - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /gen3discoveryai/credentials.json volumeMounts: - - name: gen3-discovery-ai-g3auto-volume - readOnly: true - mountPath: /gen3discoveryai/.env - subPath: env - - name: gen3-discovery-ai-g3auto-volume - readOnly: true - mountPath: /gen3discoveryai/credentials.json - subPath: credentials.json - optional: true - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/storage_config.json @@ -96,11 +83,17 @@ spec: GEN3_DEBUG_FLAG|-value: "False"-| - name: ANONYMIZED_TELEMETRY value: "False" + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /gen3discoveryai/credentials.json volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/.env subPath: env + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/credentials.json + subPath: credentials.json - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/storage_config.json @@ -139,11 +132,17 @@ spec: GEN3_DEBUG_FLAG|-value: "False"-| - name: ANONYMIZED_TELEMETRY value: "False" + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /gen3discoveryai/credentials.json volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/.env subPath: env + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/credentials.json + subPath: credentials.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always From 15551554730c8c598e36b2cdcc779ec3f551e52a Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 3 Nov 2023 13:22:52 -0500 Subject: [PATCH 42/72] fix(mount): don't create another dir --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 2a314f6b1..654cf3dc8 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -141,7 +141,7 @@ spec: subPath: env - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /gen3discoveryai/credentials.json + mountPath: /gen3discoveryai/ subPath: credentials.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge From 657278fed177e07ac10a0fe5b26e097805fc96ba Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 3 Nov 2023 13:23:19 -0500 Subject: [PATCH 43/72] fix(mount): don't create another dir --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 654cf3dc8..e62bfd921 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -92,7 +92,7 @@ spec: subPath: env - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /gen3discoveryai/credentials.json + mountPath: /gen3discoveryai/ subPath: credentials.json - name: gen3-discovery-ai-g3auto-volume readOnly: true From ebfe47727afc1d531ede637aaa1533c8d94aa0bb Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 3 Nov 2023 14:00:24 -0500 Subject: [PATCH 44/72] fix(mounts): fix paths --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index e62bfd921..680062ebd 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -47,7 +47,7 @@ spec: volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /gen3discoveryai/storage_config.json + mountPath: /gen3discoveryai/ subPath: storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge @@ -88,7 +88,7 @@ spec: volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /gen3discoveryai/.env + mountPath: /gen3discoveryai/ subPath: env - name: gen3-discovery-ai-g3auto-volume readOnly: true @@ -96,7 +96,7 @@ spec: subPath: credentials.json - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /gen3discoveryai/storage_config.json + mountPath: /gen3discoveryai/ subPath: storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge @@ -137,7 +137,7 @@ spec: volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /gen3discoveryai/.env + mountPath: /gen3discoveryai/ subPath: env - name: gen3-discovery-ai-g3auto-volume readOnly: true From 32bc95bc69d26335b6a642ce3119986d770fc346 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 10:26:32 -0600 Subject: [PATCH 45/72] fix(mounts): mount all secrets --- .../gen3-discovery-ai-deploy.yaml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 680062ebd..fb0c4619c 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -48,7 +48,6 @@ spec: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/ - subPath: storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always @@ -89,15 +88,6 @@ spec: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/ - subPath: env - - name: gen3-discovery-ai-g3auto-volume - readOnly: true - mountPath: /gen3discoveryai/ - subPath: credentials.json - - name: gen3-discovery-ai-g3auto-volume - readOnly: true - mountPath: /gen3discoveryai/ - subPath: storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always @@ -138,11 +128,6 @@ spec: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/ - subPath: env - - name: gen3-discovery-ai-g3auto-volume - readOnly: true - mountPath: /gen3discoveryai/ - subPath: credentials.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always From acd3537c2de6fe0d289029684d3ee44b1ceb0b73 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 10:33:35 -0600 Subject: [PATCH 46/72] fix(secrets): allow .env file to be a secret --- gen3/bin/secrets.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen3/bin/secrets.sh b/gen3/bin/secrets.sh index 2c5523dc0..b514186a6 100644 --- a/gen3/bin/secrets.sh +++ b/gen3/bin/secrets.sh @@ -168,7 +168,7 @@ gen3_secrets_sync() { # in subshell now - forget about local flags="" for secretValueFile in *; do - if [[ -f "$secretValueFile" && "$secretValueFile" =~ ^[a-zA-Z0-9][^\ ]*[a-zA-Z0-9]$ && ! "$secretValueFile" =~ \.swp$ ]]; then + if [[ -f "$secretValueFile" && (("$secretValueFile" == ".env") || ("$secretValueFile" =~ ^[a-zA-Z0-9][^\ ]*[a-zA-Z0-9]$ && ! "$secretValueFile" =~ \.swp$)) ]]; then flags="$flags --from-file=$secretValueFile" else gen3_log_info "gen3_secrets_sync" "ignoring funny secrets file g3auto/$serviceName/$secretValueFile" From 6f9e6c9531f260f494dc21fe7c36db2bc5c34d33 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 10:43:26 -0600 Subject: [PATCH 47/72] fix(secrets): revert failed attempt to support .env --- gen3/bin/secrets.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen3/bin/secrets.sh b/gen3/bin/secrets.sh index b514186a6..2c5523dc0 100644 --- a/gen3/bin/secrets.sh +++ b/gen3/bin/secrets.sh @@ -168,7 +168,7 @@ gen3_secrets_sync() { # in subshell now - forget about local flags="" for secretValueFile in *; do - if [[ -f "$secretValueFile" && (("$secretValueFile" == ".env") || ("$secretValueFile" =~ ^[a-zA-Z0-9][^\ ]*[a-zA-Z0-9]$ && ! "$secretValueFile" =~ \.swp$)) ]]; then + if [[ -f "$secretValueFile" && "$secretValueFile" =~ ^[a-zA-Z0-9][^\ ]*[a-zA-Z0-9]$ && ! "$secretValueFile" =~ \.swp$ ]]; then flags="$flags --from-file=$secretValueFile" else gen3_log_info "gen3_secrets_sync" "ignoring funny secrets file g3auto/$serviceName/$secretValueFile" From 55efaee9d2ef471cf3b4430fd1a3a677a04f8213 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 12:09:21 -0600 Subject: [PATCH 48/72] chore(ai): cd to dir with pyproject.toml --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index fb0c4619c..44aa1cc6d 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -105,6 +105,7 @@ spec: echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo + cd /gen3discoveryai echo running load_into_knowledge_store.py poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp rm -r /gen3discoveryai/knowledge/tmp/ From 42eb6a3b6283f34994fd2cb62fd87d4f0336cef8 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 12:30:04 -0600 Subject: [PATCH 49/72] chore(ai): try to fix issue with pyproject.toml --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 44aa1cc6d..c63140c8a 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -105,9 +105,8 @@ spec: echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo - cd /gen3discoveryai echo running load_into_knowledge_store.py - poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp + python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp rm -r /gen3discoveryai/knowledge/tmp/ echo echo AFTER /gen3discoveryai/knowledge From b7231897e973697b769850ab82e0e65b71758dc3 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 12:52:13 -0600 Subject: [PATCH 50/72] fix(ai): actually we need to poetry run --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index c63140c8a..fb0c4619c 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -106,7 +106,7 @@ spec: ls -Ra /gen3discoveryai/knowledge echo echo running load_into_knowledge_store.py - python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp + poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp rm -r /gen3discoveryai/knowledge/tmp/ echo echo AFTER /gen3discoveryai/knowledge From b331a94bbb1d808202786a4cedf26643b4fe4db3 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 12:58:33 -0600 Subject: [PATCH 51/72] chore(ai): debug lines --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index fb0c4619c..7e921c9f6 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -111,6 +111,7 @@ spec: echo echo AFTER /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge + while true; sleep 100; done containers: - name: gen3-discovery-ai GEN3_GEN3-DISCOVERY-AI_IMAGE @@ -136,4 +137,8 @@ spec: cpu: 1 limits: cpu: 2 - memory: 512Mi \ No newline at end of file + memory: 512Mi + args: + - "-c" + - | + while true; sleep 100; done \ No newline at end of file From 6e280bf1e4f8abbb6ca43c3936b6ad8f6ff74aa0 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 13:15:56 -0600 Subject: [PATCH 52/72] chore(ai): debug lines --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 7e921c9f6..cf64c9477 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -111,7 +111,7 @@ spec: echo echo AFTER /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge - while true; sleep 100; done + while true containers: - name: gen3-discovery-ai GEN3_GEN3-DISCOVERY-AI_IMAGE @@ -141,4 +141,4 @@ spec: args: - "-c" - | - while true; sleep 100; done \ No newline at end of file + while true \ No newline at end of file From 7e3c42c3ee14fe7867287b70f8e0834cf3853f88 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 13:22:37 -0600 Subject: [PATCH 53/72] chore(ai): debug lines --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index cf64c9477..8cd2bc60b 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -111,7 +111,7 @@ spec: echo echo AFTER /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge - while true + while :; do echo 'Press to exit.'; sleep 1; done containers: - name: gen3-discovery-ai GEN3_GEN3-DISCOVERY-AI_IMAGE @@ -141,4 +141,4 @@ spec: args: - "-c" - | - while true \ No newline at end of file + while :; do echo 'Press to exit.'; sleep 1; done From 91824045c4c8814bbee132e60f584a65bd34124f Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 13:24:33 -0600 Subject: [PATCH 54/72] chore(ai): debug lines --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 8cd2bc60b..b2b77dd26 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -105,6 +105,8 @@ spec: echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo + pwd + ls -Ra . echo running load_into_knowledge_store.py poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp rm -r /gen3discoveryai/knowledge/tmp/ From 7dc836da6a6160bf271cc802d4cb55d15a030ced Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 13:26:53 -0600 Subject: [PATCH 55/72] chore(ai): debug lines --- .../services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index b2b77dd26..195490a83 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -105,8 +105,13 @@ spec: echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo + echo -------------------------------------------- pwd + echo -------------------------------------------- + ls + echo -------------------------------------------- ls -Ra . + echo -------------------------------------------- echo running load_into_knowledge_store.py poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp rm -r /gen3discoveryai/knowledge/tmp/ From 5fa9d76d755213436ed39e79b12d35ce42b779fb Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 13:36:00 -0600 Subject: [PATCH 56/72] fix(mount): don't overwrite whole dir --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 195490a83..9eb9e2781 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -48,6 +48,7 @@ spec: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/ + subPath: env,credentials.json,storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always @@ -88,6 +89,7 @@ spec: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/ + subPath: env,credentials.json,storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always @@ -136,6 +138,7 @@ spec: - name: gen3-discovery-ai-g3auto-volume readOnly: true mountPath: /gen3discoveryai/ + subPath: env,credentials.json,storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always From d3cf859f9b5f70c44c4a107d5162598683aff77e Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 13:47:56 -0600 Subject: [PATCH 57/72] fix(ai): mounts --- .../gen3-discovery-ai-deploy.yaml | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 9eb9e2781..6d0a5d1dd 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -47,8 +47,16 @@ spec: volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /gen3discoveryai/ - subPath: env,credentials.json,storage_config.json + mountPath: /gen3discoveryai/.env + subPath: env + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/credentials.json + subPath: credentials.json + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/storage_config.json + subPath: storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always @@ -88,8 +96,16 @@ spec: volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /gen3discoveryai/ - subPath: env,credentials.json,storage_config.json + mountPath: /gen3discoveryai/.env + subPath: env + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/credentials.json + subPath: credentials.json + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/storage_config.json + subPath: storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always @@ -137,8 +153,16 @@ spec: volumeMounts: - name: gen3-discovery-ai-g3auto-volume readOnly: true - mountPath: /gen3discoveryai/ - subPath: env,credentials.json,storage_config.json + mountPath: /gen3discoveryai/.env + subPath: env + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/credentials.json + subPath: credentials.json + - name: gen3-discovery-ai-g3auto-volume + readOnly: true + mountPath: /gen3discoveryai/storage_config.json + subPath: storage_config.json - name: gen3-discovery-ai-knowledge-library-volume mountPath: /gen3discoveryai/knowledge imagePullPolicy: Always From b4fcc9b5a9946cd92b70734f7b0905019a65aee8 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 14:16:14 -0600 Subject: [PATCH 58/72] chore(ai): remove debug lines --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 6d0a5d1dd..3bb71bba9 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -122,14 +122,6 @@ spec: echo echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge - echo - echo -------------------------------------------- - pwd - echo -------------------------------------------- - ls - echo -------------------------------------------- - ls -Ra . - echo -------------------------------------------- echo running load_into_knowledge_store.py poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp rm -r /gen3discoveryai/knowledge/tmp/ @@ -172,7 +164,3 @@ spec: limits: cpu: 2 memory: 512Mi - args: - - "-c" - - | - while :; do echo 'Press to exit.'; sleep 1; done From 231c7fb22fd84b4d3fcabefa7a79112145154aa6 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Thu, 9 Nov 2023 14:29:48 -0600 Subject: [PATCH 59/72] fix(ai): remove debug --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 3bb71bba9..ad0259457 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -128,7 +128,6 @@ spec: echo echo AFTER /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge - while :; do echo 'Press to exit.'; sleep 1; done containers: - name: gen3-discovery-ai GEN3_GEN3-DISCOVERY-AI_IMAGE From 598e7480c871b68d65e768bca0fd0150b354a738 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 10 Nov 2023 13:03:29 -0600 Subject: [PATCH 60/72] chore(debug): debug line --- .../services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index ad0259457..536194b5b 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -163,3 +163,8 @@ spec: limits: cpu: 2 memory: 512Mi + command: ["/bin/bash"] + args: + - "-c" + - | + while :; do echo 'Press to exit.'; sleep 1; done From 56f1b71274ae43d7e32a99555dee0614e2ed0f71 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Mon, 13 Nov 2023 10:55:46 -0600 Subject: [PATCH 61/72] chore(debug): remove debug line --- .../gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 536194b5b..2b65f8b46 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -162,9 +162,4 @@ spec: cpu: 1 limits: cpu: 2 - memory: 512Mi - command: ["/bin/bash"] - args: - - "-c" - - | - while :; do echo 'Press to exit.'; sleep 1; done + memory: 512Mi \ No newline at end of file From f42023fd620c1a892754b6a5d73a1a94d5b199da Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Mon, 13 Nov 2023 16:33:29 -0600 Subject: [PATCH 62/72] feat(ai): add to roll all, fix port in service yaml --- gen3/bin/kube-roll-all.sh | 6 ++++++ .../gen3-discovery-ai/gen3-discovery-ai-service.yaml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gen3/bin/kube-roll-all.sh b/gen3/bin/kube-roll-all.sh index 552c27708..6f1a62b33 100644 --- a/gen3/bin/kube-roll-all.sh +++ b/gen3/bin/kube-roll-all.sh @@ -243,6 +243,12 @@ else gen3_log_info "not deploying dicom-viewer - no manifest entry for '.versions[\"dicom-viewer\"]'" fi +if g3k_manifest_lookup '.versions["gen3-discovery-ai"]' 2> /dev/null; then + gen3 kube-setup-gen3-discovery-ai & +else + gen3_log_info "not deploying gen3-discovery-ai - no manifest entry for '.versions[\"gen3-discovery-ai\"]'" +fi + gen3 kube-setup-revproxy if [[ "$GEN3_ROLL_FAST" != "true" ]]; then diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml index eae88bf9b..1634d7c4f 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml @@ -18,7 +18,7 @@ spec: ports: - protocol: TCP port: 80 - targetPort: 80 + targetPort: 8089 name: http nodePort: null - protocol: TCP From 127bf97f51d783b5bd4116a665c384e3bc3acaa4 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Tue, 14 Nov 2023 13:08:20 -0600 Subject: [PATCH 63/72] fix(ai): fix nginx conf file name --- ...n3-discoveryai-service.conf => gen3-discovery-ai-service.conf} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename kube/services/revproxy/gen3.nginx.conf/{gen3-discoveryai-service.conf => gen3-discovery-ai-service.conf} (100%) diff --git a/kube/services/revproxy/gen3.nginx.conf/gen3-discoveryai-service.conf b/kube/services/revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf similarity index 100% rename from kube/services/revproxy/gen3.nginx.conf/gen3-discoveryai-service.conf rename to kube/services/revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf From a2d633c30dc3ba61abd9a3d51ed60198f4479d03 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Mon, 20 Nov 2023 13:30:06 -0600 Subject: [PATCH 64/72] fix(nginx): fix routing for AI service to add trailing slash after "ai" --- .../revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kube/services/revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf b/kube/services/revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf index cc7b361e2..42e9a3758 100644 --- a/kube/services/revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf +++ b/kube/services/revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf @@ -5,8 +5,8 @@ set $proxy_service "gen3-discovery-ai-service"; set $upstream http://gen3-discovery-ai-service$des_domain; - rewrite ^/ai(.*) /$1 break; + rewrite ^/ai/(.*) /$1 break; proxy_pass $upstream; - proxy_redirect http://$host/ https://$host/ai; + proxy_redirect http://$host/ https://$host/ai/; client_max_body_size 0; } From 8d0522f52873a9bb8ea66d092e67078d5fde7cec Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Mon, 20 Nov 2023 14:30:59 -0600 Subject: [PATCH 65/72] Update web_whitelist --- files/squid_whitelist/web_whitelist | 1 - 1 file changed, 1 deletion(-) diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist index 154d3c459..0cbc0a6bc 100644 --- a/files/squid_whitelist/web_whitelist +++ b/files/squid_whitelist/web_whitelist @@ -15,7 +15,6 @@ ctds-planx.atlassian.net data.cityofchicago.org dataguids.org api.login.yahoo.com -api.openai.com api.snapcraft.io apt.kubernetes.io argoproj.github.io From fe60b486b0ab8869721ff56358e230dca635a84f Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Mon, 20 Nov 2023 14:32:01 -0600 Subject: [PATCH 66/72] Update kube-setup-gen3-discovery-ai.sh --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index abf3f9337..cfab16703 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -78,7 +78,7 @@ setup_storage() { # gen3-discovery-ai-g3auto secret still does not exist # we need to setup an S3 bucket and IAM creds # let's avoid creating multiple buckets for different - # deployments to the same k8s cluseter (dev, etc) + # deployments to the same k8s cluster (dev, etc) # local bucketName local accountNumber From 39b6f793f3040d6fea548b678efe7cd860d8a4d3 Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Mon, 20 Nov 2023 14:35:07 -0600 Subject: [PATCH 67/72] Update README.md --- kube/services/gen3-discovery-ai/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kube/services/gen3-discovery-ai/README.md b/kube/services/gen3-discovery-ai/README.md index 7895f30a9..ef279dc7a 100644 --- a/kube/services/gen3-discovery-ai/README.md +++ b/kube/services/gen3-discovery-ai/README.md @@ -7,10 +7,10 @@ Expects secrets setup in `g3auto/gen3-discovery-ai` folder - `credentials.json`: Google service account key if using a topic with Google Vertex AI - `env`: .env file contents for service configuration (see service repo for a default one) -## Populating Disc for In-Memory Vectordb Chromadb +## Populating Disk for In-Memory Vectordb Chromadb In order to setup pre-configured topics, we need to load a bunch of data -into Chromadb (which is an inmem vectordb with an option to persist to disk). +into Chromadb (which is an in-mem vectordb with an option to persist to disk). To load topics consistently, we setup an S3 bucket to house the persisted data for the vectordb. @@ -31,5 +31,5 @@ See the Gen3 Discovery AI service repo README for more info. ### Getting data from S3 in mem We specify a path for Chromadb to use for persisted data and when it sees -data there, it loads it in. So the deployment automation aws syncs the bucket -and then calls a script to load the files into the in-mem vectorstore from there. \ No newline at end of file +data there, it loads it in. So the deployment automation: 1. aws syncs the bucket +and then 2. calls a script to load the files into the in-mem vectorstore from there. From 0f455dca9e9a94366e6e67a7ef5e18b3e1cc2900 Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Mon, 20 Nov 2023 14:36:51 -0600 Subject: [PATCH 68/72] Update gen3-discovery-ai-deploy.yaml --- .../services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 2b65f8b46..145819578 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -28,7 +28,8 @@ spec: - name: gen3-discovery-ai-knowledge-library-volume emptyDir: {} initContainers: - # chromadb's persisted disk expects the ability to write. + # chromadb's persisted disk support requires the ability to write. We don't technically need this ability + # since we're populating the entirety of the database from configured files (no live updates). # # Solution: utilize emptyDir as a writable space. # @@ -162,4 +163,4 @@ spec: cpu: 1 limits: cpu: 2 - memory: 512Mi \ No newline at end of file + memory: 512Mi From 022c30eba38da281820fe74217342d5daaaa5087 Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Mon, 20 Nov 2023 14:41:16 -0600 Subject: [PATCH 69/72] Update gen3-discovery-ai-deploy.yaml --- kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index 145819578..a72cea1cf 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -163,4 +163,5 @@ spec: cpu: 1 limits: cpu: 2 + # NOTE: If the configured data for the knowledge library (vector database) is large, you may need to bump this memory: 512Mi From 0b3b2098d3da232932cbdb34c37f8aeb9849a687 Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Tue, 28 Nov 2023 11:10:02 -0600 Subject: [PATCH 70/72] Update gen3-discovery-ai-service.yaml --- .../gen3-discovery-ai/gen3-discovery-ai-service.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml index 1634d7c4f..b4734c3b8 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml @@ -2,15 +2,6 @@ kind: Service apiVersion: v1 metadata: name: gen3-discovery-ai-service - annotations: - getambassador.io/config: | - --- - apiVersion: ambassador/v1 - ambassador_id: "gen3" - kind: Mapping - name: gen3-discovery-ai_mapping - prefix: /index/ - service: http://gen3-discovery-ai-service:80 spec: selector: app: gen3-discovery-ai From 8a54edd67b47805a4e1892ddae304e7364bd4ce8 Mon Sep 17 00:00:00 2001 From: Alexander VanTol Date: Tue, 28 Nov 2023 15:00:21 -0600 Subject: [PATCH 71/72] Update kube-setup-gen3-discovery-ai.sh --- gen3/bin/kube-setup-gen3-discovery-ai.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh index cfab16703..44a472a74 100644 --- a/gen3/bin/kube-setup-gen3-discovery-ai.sh +++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Deploy the gen3-discovery-ai service. +# Deploy the gen3-discovery-ai service # source "${GEN3_HOME}/gen3/lib/utils.sh" From 583653912046b253dfdba8fda488c666ab8f3999 Mon Sep 17 00:00:00 2001 From: Alex VanTol Date: Fri, 26 Jan 2024 13:12:56 -0600 Subject: [PATCH 72/72] feat(discovery): update to data load commands and strategy to support markdown --- kube/services/gen3-discovery-ai/README.md | 35 +++++++++++-------- .../gen3-discovery-ai-deploy.yaml | 18 ++++++++-- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/kube/services/gen3-discovery-ai/README.md b/kube/services/gen3-discovery-ai/README.md index ef279dc7a..4c20678e0 100644 --- a/kube/services/gen3-discovery-ai/README.md +++ b/kube/services/gen3-discovery-ai/README.md @@ -1,8 +1,28 @@ # Gen3 Discovery AI Configuration -Expects configuration in a `gen3-discovery-ai` folder relative to +Expects data in a `gen3-discovery-ai` folder relative to where the `manifest.json` is. +Basic setup: + +`{{dir where manifest.json is}}/gen3-discovery-ai/knowledge/` + +- `tsvs` folder + - tsvs with topic_name at beginning of file +- `markdown` folder + - {{topic_name_1}} + - markdown file(s) + - {{topic_name_2}} + - markdown file(s) + +The `kube-setup-gen3-discovery-ai` script syncs the above `/knowledge` folder to +an S3 bucket. The service configuration then pulls from the S3 bucket and runs load commands +to get the data into chromadb. + +> Note: See the `gen3-discovery-ai` service repo docs and README for more details on data load capabilities. + +Check the `gen3-discovery-ai-deploy.yaml` for what commands are being run in the automation. + Expects secrets setup in `g3auto/gen3-discovery-ai` folder - `credentials.json`: Google service account key if using a topic with Google Vertex AI - `env`: .env file contents for service configuration (see service repo for a default one) @@ -15,19 +35,6 @@ into Chromadb (which is an in-mem vectordb with an option to persist to disk). To load topics consistently, we setup an S3 bucket to house the persisted data for the vectordb. -### Getting data into S3 - -We could support more than TSVs in the future, but for now that's the only automated support. - -Move TSVs of data into the configuration in cdis-manifest. The expectation is that for Chromadb loading, the -files are placed in a `gen3-discovery-ai/knowledge/tsvs` folder relative to -where the `manifest.json` is. For example: -`~/cdis-manifest/avantol.planx-pla.net/gen3-discovery-ai/gen3-discovery-ai/knowledge/tsvs` - -You can rsync from local if you have files locally. - -See the Gen3 Discovery AI service repo README for more info. - ### Getting data from S3 in mem We specify a path for Chromadb to use for persisted data and when it sees diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml index a72cea1cf..dcfe03248 100644 --- a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml +++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml @@ -78,7 +78,7 @@ spec: ls -Ra /gen3discoveryai/knowledge echo echo syncing from s3 - aws s3 sync "s3://${bucketName}/tsvs" "/gen3discoveryai/knowledge/tmp" + aws s3 sync "s3://${bucketName}" "/gen3discoveryai/knowledge/tmp" echo echo AFTER /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge @@ -124,7 +124,21 @@ spec: echo BEFORE /gen3discoveryai/knowledge ls -Ra /gen3discoveryai/knowledge echo running load_into_knowledge_store.py - poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py /gen3discoveryai/knowledge/tmp + poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py tsvs /gen3discoveryai/knowledge/tmp/tsvs + + if [ -d "/gen3discoveryai/knowledge/tmp/markdown" ]; then + for dir in "/gen3discoveryai/knowledge/tmp/markdown"/*; do + if [ -d "$dir" ]; then + dir_name=$(basename "$dir") + + echo "Processing directory: $dir_name. Full path: $dir" + poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py markdown --topic $dir_name $dir + fi + done + else + echo "Not syncing markdown, directory not found: /gen3discoveryai/knowledge/tmp/markdown" + fi + rm -r /gen3discoveryai/knowledge/tmp/ echo echo AFTER /gen3discoveryai/knowledge