diff --git a/packages/celotool/src/cmds/deploy/destroy/prometheus.ts b/packages/celotool/src/cmds/deploy/destroy/prometheus.ts index f82417b584c..4af0a3d8154 100644 --- a/packages/celotool/src/cmds/deploy/destroy/prometheus.ts +++ b/packages/celotool/src/cmds/deploy/destroy/prometheus.ts @@ -2,11 +2,7 @@ import { DestroyArgv } from 'src/cmds/deploy/destroy' import { switchToClusterFromEnvOrContext } from 'src/lib/cluster' import { addContextMiddleware, ContextArgv } from 'src/lib/context-utils' import { exitIfCelotoolHelmDryRun } from 'src/lib/helm_deploy' -import { - removeGKEWorkloadMetrics, - removeGrafanaHelmRelease, - removePrometheus, -} from 'src/lib/prometheus' +import { removeGrafanaHelmRelease, removePrometheus } from 'src/lib/prometheus' export const command = 'prometheus' @@ -23,6 +19,5 @@ export const handler = async (argv: PrometheusDestroyArgv) => { await switchToClusterFromEnvOrContext(argv, true) await removeGrafanaHelmRelease() - await removeGKEWorkloadMetrics() await removePrometheus() } diff --git a/packages/celotool/src/cmds/deploy/initial/prometheus.ts b/packages/celotool/src/cmds/deploy/initial/prometheus.ts index f44f7e03554..21ea49d85ad 100644 --- a/packages/celotool/src/cmds/deploy/initial/prometheus.ts +++ b/packages/celotool/src/cmds/deploy/initial/prometheus.ts @@ -1,11 +1,7 @@ import { InitialArgv } from 'src/cmds/deploy/initial' import { switchToClusterFromEnvOrContext } from 'src/lib/cluster' import { addContextMiddleware, ContextArgv } from 'src/lib/context-utils' -import { - installGKEWorkloadMetricsIfNotExists, - installGrafanaIfNotExists, - installPrometheusIfNotExists, -} from 'src/lib/prometheus' +import { installGrafanaIfNotExists, installPrometheusIfNotExists } from 'src/lib/prometheus' export const command = 'prometheus' @@ -13,19 +9,12 @@ export const describe = 'deploy prometheus to a kubernetes cluster using Helm' export type PrometheusInitialArgv = InitialArgv & ContextArgv & { - deployGKEWorkloadMetrics: boolean deployGrafana: boolean skipClusterSetup: boolean } export const builder = (argv: PrometheusInitialArgv) => { return addContextMiddleware(argv) - .option('deployGKEWorkloadMetrics', { - type: 'boolean', - description: - 'Include GKE Workload Metrics, see https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#workload-metrics', - default: false, - }) .option('deploy-grafana', { type: 'boolean', description: 'Include the deployment of grafana helm chart', @@ -42,9 +31,6 @@ export const handler = async (argv: PrometheusInitialArgv) => { const clusterConfig = await switchToClusterFromEnvOrContext(argv, argv.skipClusterSetup) await installPrometheusIfNotExists(argv.context, clusterConfig) - if (argv.deployGKEWorkloadMetrics) { - await installGKEWorkloadMetricsIfNotExists(clusterConfig) - } if (argv.deployGrafana) { await installGrafanaIfNotExists(argv.context, clusterConfig) } diff --git a/packages/celotool/src/cmds/deploy/upgrade/prometheus.ts b/packages/celotool/src/cmds/deploy/upgrade/prometheus.ts index e74afe813f5..48e7a9c3a36 100644 --- a/packages/celotool/src/cmds/deploy/upgrade/prometheus.ts +++ b/packages/celotool/src/cmds/deploy/upgrade/prometheus.ts @@ -1,7 +1,7 @@ import { UpgradeArgv } from 'src/cmds/deploy/upgrade' import { switchToClusterFromEnvOrContext } from 'src/lib/cluster' import { addContextMiddleware, ContextArgv } from 'src/lib/context-utils' -import { upgradeGKEWorkloadMetrics, upgradeGrafana, upgradePrometheus } from 'src/lib/prometheus' +import { upgradeGrafana, upgradePrometheus } from 'src/lib/prometheus' export const command = 'prometheus' @@ -9,23 +9,15 @@ export const describe = 'upgrade prometheus to a kubernetes cluster using Helm' export type PrometheusUpgradeArgv = UpgradeArgv & ContextArgv & { - deployGKEWorkloadMetrics: boolean deployGrafana: boolean } export const builder = (argv: PrometheusUpgradeArgv) => { - return addContextMiddleware(argv) - .option('deployGKEWorkloadMetrics', { - type: 'boolean', - description: - 'Include GKE Workload Metrics, see https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#workload-metrics', - default: false, - }) - .option('deploy-grafana', { - type: 'boolean', - description: 'Include the deployment of grafana helm chart', - default: false, - }) + return addContextMiddleware(argv).option('deploy-grafana', { + type: 'boolean', + description: 'Include the deployment of grafana helm chart', + default: false, + }) } export const handler = async (argv: PrometheusUpgradeArgv) => { @@ -33,9 +25,6 @@ export const handler = async (argv: PrometheusUpgradeArgv) => { await upgradePrometheus(argv.context, clusterConfig) - if (argv.deployGKEWorkloadMetrics) { - await upgradeGKEWorkloadMetrics(clusterConfig) - } if (argv.deployGrafana) { await upgradeGrafana(argv.context, clusterConfig) } diff --git a/packages/celotool/src/lib/prometheus.ts b/packages/celotool/src/lib/prometheus.ts index ae7175b247c..69d7c03778b 100644 --- a/packages/celotool/src/lib/prometheus.ts +++ b/packages/celotool/src/lib/prometheus.ts @@ -10,13 +10,11 @@ import { } from './env-utils' import { installGenericHelmChart, - isCelotoolHelmDryRun, removeGenericHelmChart, setHelmArray, upgradeGenericHelmChart, } from './helm_deploy' import { BaseClusterConfig, CloudProvider } from './k8s-cluster/base' -import { GCPClusterConfig } from './k8s-cluster/gcp' import { createServiceAccountIfNotExists, getServiceAccountEmail, @@ -37,9 +35,6 @@ const sidecarImageTag = '0.8.2' // Prometheus container registry with latest tags: https://hub.docker.com/r/prom/prometheus/tags const prometheusImageTag = 'v2.27.1' -const GKEWorkloadMetricsHelmChartPath = '../helm-charts/gke-workload-metrics' -const GKEWorkloadMetricsReleaseName = 'gke-workload-metrics' - const grafanaHelmChartPath = '../helm-charts/grafana' const grafanaReleaseName = 'grafana' @@ -400,105 +395,3 @@ async function grafanaHelmParameters(context?: string, clusterConfig?: BaseClust const params = [`-f ${valuesFile}`] return params } - -// See https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#enable-workload-metrics -async function enableGKESystemAndWorkloadMetrics( - clusterID: string, - zone: string, - gcloudProjectName: string -) { - const GKEWMEnabled = await outputIncludes( - `gcloud beta container clusters describe ${clusterID} --zone=${zone} --project=${gcloudProjectName} --format="value(monitoringConfig.componentConfig.enableComponents)"`, - 'WORKLOADS', - `GKE cluster ${clusterID} in zone ${zone} and project ${gcloudProjectName} has GKE workload metrics enabled, skipping gcloud beta container clusters update` - ) - - if (!GKEWMEnabled) { - if (isCelotoolHelmDryRun()) { - console.info( - `Skipping enabling GKE workload metrics for cluster ${clusterID} in zone ${zone} and project ${gcloudProjectName} due to --helmdryrun` - ) - } else { - await execCmdWithExitOnFailure( - `gcloud beta container clusters update ${clusterID} --zone=${zone} --project=${gcloudProjectName} --monitoring=SYSTEM,WORKLOAD` - ) - } - } -} - -async function GKEWorkloadMetricsHelmParameters(clusterConfig?: BaseClusterConfig) { - // Abandon if not using GCP, it's GKE specific. - if (clusterConfig && clusterConfig.cloudProvider !== CloudProvider.GCP) { - console.error('Cannot create gke-workload-metrics in a non GCP k8s cluster, skipping') - process.exit(1) - } - - const clusterName = clusterConfig - ? clusterConfig!.clusterName - : fetchEnv(envVar.KUBERNETES_CLUSTER_NAME) - - const params = [`--set cluster=${clusterName}`] - return params -} - -export async function installGKEWorkloadMetricsIfNotExists(clusterConfig?: BaseClusterConfig) { - const GKEWMExists = await outputIncludes( - `helm list -A`, - GKEWorkloadMetricsReleaseName, - `gke-workload-metrics exists, skipping install` - ) - if (!GKEWMExists) { - console.info('Installing gke-workload-metrics') - await installGKEWorkloadMetrics(clusterConfig) - } -} - -async function installGKEWorkloadMetrics(clusterConfig?: BaseClusterConfig) { - // Abandon if not using GCP, it's GKE specific. - if (clusterConfig && clusterConfig.cloudProvider !== CloudProvider.GCP) { - console.error('Cannot create gke-workload-metrics in a non GCP k8s cluster, skipping') - process.exit(1) - } - - let k8sClusterName, k8sClusterZone, gcpProjectName - if (clusterConfig) { - const configGCP = clusterConfig as GCPClusterConfig - k8sClusterName = configGCP!.clusterName - k8sClusterZone = configGCP!.zone - gcpProjectName = configGCP!.projectName - } else { - k8sClusterName = fetchEnv(envVar.KUBERNETES_CLUSTER_NAME) - k8sClusterZone = fetchEnv(envVar.KUBERNETES_CLUSTER_ZONE) - gcpProjectName = fetchEnv(envVar.TESTNET_PROJECT_NAME) - } - - await enableGKESystemAndWorkloadMetrics(k8sClusterName, k8sClusterZone, gcpProjectName) - - await createNamespaceIfNotExists(kubeNamespace) - return installGenericHelmChart( - kubeNamespace, - GKEWorkloadMetricsReleaseName, - GKEWorkloadMetricsHelmChartPath, - await GKEWorkloadMetricsHelmParameters(clusterConfig) - ) -} - -export async function upgradeGKEWorkloadMetrics(clusterConfig?: BaseClusterConfig) { - const params = await GKEWorkloadMetricsHelmParameters(clusterConfig) - - await createNamespaceIfNotExists(kubeNamespace) - return upgradeGenericHelmChart( - kubeNamespace, - GKEWorkloadMetricsReleaseName, - GKEWorkloadMetricsHelmChartPath, - params - ) -} - -export async function removeGKEWorkloadMetrics() { - const GKEWMExists = await outputIncludes(`helm list -A`, GKEWorkloadMetricsReleaseName) - if (GKEWMExists) { - console.info('Removing gke-workload-metrics') - await removeGenericHelmChart(GKEWorkloadMetricsReleaseName, kubeNamespace) - } -} diff --git a/packages/helm-charts/gke-workload-metrics/Chart.yaml b/packages/helm-charts/gke-workload-metrics/Chart.yaml deleted file mode 100644 index 8cffb17e361..00000000000 --- a/packages/helm-charts/gke-workload-metrics/Chart.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: v1 -appVersion: "1.0" -name: gke-workload-metrics -description: Chart to manage Prometheus GKE workload metrics, see https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics -keywords: -- gcp -- metrics -- prometheus -version: 0.1.0 \ No newline at end of file diff --git a/packages/helm-charts/gke-workload-metrics/NOTES.txt b/packages/helm-charts/gke-workload-metrics/NOTES.txt deleted file mode 100644 index 64167d12ec0..00000000000 --- a/packages/helm-charts/gke-workload-metrics/NOTES.txt +++ /dev/null @@ -1,5 +0,0 @@ -You have deployed the following release: {{ .Release.Name }}. - -To get further information, you can run the commands: - $ helm status {{ .Release.Name }} - $ helm get all {{ .Release.Name }} \ No newline at end of file diff --git a/packages/helm-charts/gke-workload-metrics/README.md b/packages/helm-charts/gke-workload-metrics/README.md deleted file mode 100644 index f1d40cc4835..00000000000 --- a/packages/helm-charts/gke-workload-metrics/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# GKE Workload Metrics - -Helm charts to manage `PodMonitors` to scrape GCP workload metrics. -See the [GCP documentation](https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#workload-metrics) for more details and requirements. - -This is an alternative to collecting and sending Prometheus metrics to Google Cloud Monitoring as explained [here](https://cloud.google.com/stackdriver/docs/solutions/gke/prometheus). - -## Examples - -- -- diff --git a/packages/helm-charts/gke-workload-metrics/templates/pod-monitor.yaml b/packages/helm-charts/gke-workload-metrics/templates/pod-monitor.yaml deleted file mode 100644 index 4ad3d53fefb..00000000000 --- a/packages/helm-charts/gke-workload-metrics/templates/pod-monitor.yaml +++ /dev/null @@ -1,68 +0,0 @@ -{{- range $target := $.Values.targets }} -{{- if hasKey $target.k8sClustersToNamespace $.Values.cluster }} ---- -apiVersion: monitoring.gke.io/v1alpha1 -kind: PodMonitor -metadata: - name: "{{ $.Release.Name }}-{{ $target.service }}" - -spec: - # Namespaces to search for pods. - namespaceSelector: - matchNames: - - {{ get $target.k8sClustersToNamespace $.Values.cluster | default $.Values.cluster }} - - selector: - matchLabels: - {{- range $key, $val := $target.labels }} - {{ $key }}: {{ $val }} - {{- end }} - - podMetricsEndpoints: - {{- range $endpoint := $target.endpoints }} - - port: {{ $endpoint.port }} - path: {{ $endpoint.path }} - scheme: http - - metricRelabelings: - {{- range $relabel := $.Values.relabels }} - - sourceLabels: {{ $relabel.sourceLabels }} - regex: {{ $relabel.regex }} - action: {{ $relabel.action }} - {{- end }} - {{ end }} -{{ end }} -{{ end }} - -{{- /* -{{- if $.Values.kubeStateMetrics.enabled | default false }} ---- -# TODO: kube state metrics need to be migrated to -# https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics -# and the pods need to define a port name for the metrics endpoint. - -apiVersion: monitoring.gke.io/v1alpha1 -kind: PodMonitor -metadata: - name: "{{ $.Release.Name }}-kube-state" - -spec: - namespaceSelector: - matchNames: - - default - - selector: - matchLabels: - app: kube-state-metrics - - podMetricsEndpoints: - - port: metrics - path: /metrics - scheme: http - - metricRelabelings: - - sourceLabels: [__name__] - regex: (kube_pod)_.* - action: keep - {{ end }} -*/}} diff --git a/packages/helm-charts/gke-workload-metrics/values.yaml b/packages/helm-charts/gke-workload-metrics/values.yaml deleted file mode 100644 index 2c8f604ffd6..00000000000 --- a/packages/helm-charts/gke-workload-metrics/values.yaml +++ /dev/null @@ -1,95 +0,0 @@ -# Combination of pod labels and endpoints on which to scrape. - -# Which k8s cluster to monitor. -cluster: give-me-a-cluster-name - -# Namespace where the podmonitor will be installed. -namespace: prometheus - -# TODO: enable when kube state metrics pods have port names -# kubeStateMetrics: -# enabled: true - -# Each target specifies -# - service: the suffix used for the PodMonitor config -# - labels: how to find the right pods -# - k8sClustersToNamespace: where in k8s it is deployeds -# - endpoints: where to find the Prometheus metrics to scrape -# TODO: simplify the config by renaming all ports of interest to 'metric' -targets: - - service: attestation-service - labels: - app: attestation-service - k8sClustersToNamespace: - alfajores: alfajores - baklavastaging: baklava - endpoints: - - port: http - path: /metrics - - - service: blockscout - labels: - app: blockscout - k8sClustersToNamespace: - alfajores: alfajores - baklavastaging: baklava - mainnet: rc1 - rc1staging: rc1staging - endpoints: - - port: http - path: /metrics/web # Web & API - - port: health - path: /metrics/indexer # Indexer - - - service: celo-fullnode - labels: - app: celo-fullnode - k8sClustersToNamespace: - baklava-europe-west1: baklava - mainnet: rc1 - rc1-asia-east1: rc1 - rc1-europe-west1: rc1 - rc1-southamerica-east1: rc1 - rc1-us-east1: rc1 - rc1-us-west1: rc1 - rc1staging: rc1staging - endpoints: - - port: pprof - path: /debug/metrics/prometheus - - - service: eksportisto - labels: - app: eksportisto - k8sClustersToNamespace: - alfajores: alfajores - mainnet: rc1 - endpoints: - - port: prometheus - path: /metrics - - - service: testnet - labels: - app: testnet - k8sClustersToNamespace: - alfajores: alfajores - baklavastaging: baklava - endpoints: - - port: pprof - path: /debug/metrics/prometheus - - port: metrics - path: /debug/metrics/prometheus - - - service: walletconnect - labels: - app.kubernetes.io/name: walletconnect - k8sClustersToNamespace: - alfajores: walletconnect - mainnet: walletconnect - endpoints: - - port: http - path: /metrics - -relabels: - - sourceLabels: [__name__] - regex: (apiserver|etcd|erlang|kube|kubelet|nginx|phoenix|rest_client|state|storage)_.* - action: drop