diff --git a/packages/celotool/src/cmds/deploy/destroy/vm-testnet.ts b/packages/celotool/src/cmds/deploy/destroy/vm-testnet.ts index 32f0a7102a1..6ef3b1badc2 100644 --- a/packages/celotool/src/cmds/deploy/destroy/vm-testnet.ts +++ b/packages/celotool/src/cmds/deploy/destroy/vm-testnet.ts @@ -1,3 +1,5 @@ +import { switchToClusterFromEnv } from 'src/lib/cluster' +import { removeHelmRelease } from 'src/lib/prom-to-sd-utils' import { destroy } from 'src/lib/vm-testnet-utils' import { DestroyArgv } from '../../deploy/destroy' @@ -6,5 +8,8 @@ export const describe = 'destroy an existing VM-based testnet' export const builder = {} export const handler = async (argv: DestroyArgv) => { + await switchToClusterFromEnv() await destroy(argv.celoEnv) + // destroy prometheus to stackdriver statefulset + await removeHelmRelease(argv.celoEnv) } diff --git a/packages/celotool/src/cmds/deploy/initial/vm-testnet.ts b/packages/celotool/src/cmds/deploy/initial/vm-testnet.ts index 8c1eda4eb68..a514be9c4cd 100644 --- a/packages/celotool/src/cmds/deploy/initial/vm-testnet.ts +++ b/packages/celotool/src/cmds/deploy/initial/vm-testnet.ts @@ -1,4 +1,6 @@ -import { deploy } from '../../../lib/vm-testnet-utils' +import { createClusterIfNotExists, setupCluster, switchToClusterFromEnv } from 'src/lib/cluster' +import { installHelmChart } from 'src/lib/prom-to-sd-utils' +import { deploy } from 'src/lib/vm-testnet-utils' import { InitialArgv } from '../../deploy/initial' export const command = 'vm-testnet' @@ -6,5 +8,12 @@ export const describe = 'upgrade a testnet on a VM' export const builder = {} export const handler = async (argv: InitialArgv) => { + // set up Kubernetes cluster that will have prometheus to stackdriver statefulset + const createdCluster = await createClusterIfNotExists() + await switchToClusterFromEnv() + await setupCluster(argv.celoEnv, createdCluster) + // deploy VM testnet with Terraform await deploy(argv.celoEnv) + // deploy prom to sd statefulset + await installHelmChart(argv.celoEnv) } diff --git a/packages/celotool/src/cmds/deploy/upgrade/vm-testnet.ts b/packages/celotool/src/cmds/deploy/upgrade/vm-testnet.ts index 162455b427f..53c9fee6721 100644 --- a/packages/celotool/src/cmds/deploy/upgrade/vm-testnet.ts +++ b/packages/celotool/src/cmds/deploy/upgrade/vm-testnet.ts @@ -1,3 +1,5 @@ +import { switchToClusterFromEnv } from 'src/lib/cluster' +import { upgradeHelmChart } from 'src/lib/prom-to-sd-utils' import { deploy, taintTestnet, untaintTestnet } from 'src/lib/vm-testnet-utils' import yargs from 'yargs' import { UpgradeArgv } from '../../deploy/upgrade' @@ -18,10 +20,14 @@ export const builder = (argv: yargs.Argv) => { } export const handler = async (argv: VmTestnetArgv) => { + await switchToClusterFromEnv() + let onDeployFailed = () => Promise.resolve() if (argv.reset) { onDeployFailed = () => untaintTestnet(argv.celoEnv) await taintTestnet(argv.celoEnv) } await deploy(argv.celoEnv, onDeployFailed) + // upgrade prom to sd statefulset + await upgradeHelmChart(argv.celoEnv) } diff --git a/packages/celotool/src/lib/env-utils.ts b/packages/celotool/src/lib/env-utils.ts index ed4bbd0a2ba..2f74c270e6d 100644 --- a/packages/celotool/src/lib/env-utils.ts +++ b/packages/celotool/src/lib/env-utils.ts @@ -35,6 +35,8 @@ export enum envVar { GETH_ACCOUNT_SECRET = 'GETH_ACCOUNT_SECRET', GETH_BOOTNODE_DOCKER_IMAGE_REPOSITORY = 'GETH_BOOTNODE_DOCKER_IMAGE_REPOSITORY', GETH_BOOTNODE_DOCKER_IMAGE_TAG = 'GETH_BOOTNODE_DOCKER_IMAGE_TAG', + GETH_EXPORTER_DOCKER_IMAGE_REPOSITORY = 'GETH_EXPORTER_DOCKER_IMAGE_REPOSITORY', + GETH_EXPORTER_DOCKER_IMAGE_TAG = 'GETH_EXPORTER_DOCKER_IMAGE_TAG', GETH_NODES_BACKUP_CRONJOB_ENABLED = 'GETH_NODES_BACKUP_CRONJOB_ENABLED', GETH_NODE_DOCKER_IMAGE_REPOSITORY = 'GETH_NODE_DOCKER_IMAGE_REPOSITORY', GETH_NODE_DOCKER_IMAGE_TAG = 'GETH_NODE_DOCKER_IMAGE_TAG', @@ -55,6 +57,8 @@ export enum envVar { NEXMO_KEY = 'NEXMO_KEY', NEXMO_SECRET = 'NEXMO_SECRET', NOTIFICATION_SERVICE_FIREBASE_DB = 'NOTIFICATION_SERVICE_FIREBASE_DB', + PROMTOSD_EXPORT_INTERVAL = 'PROMTOSD_EXPORT_INTERVAL', + PROMTOSD_SCRAPE_INTERVAL = 'PROMTOSD_SCRAPE_INTERVAL', SMS_RETRIEVER_HASH_CODE = 'SMS_RETRIEVER_HASH_CODE', STACKDRIVER_MONITORING_DASHBOARD = 'STACKDRIVER_MONITORING_DASHBOARD', STACKDRIVER_NOTIFICATION_APPLICATIONS_PREFIX = 'STACKDRIVER_NOTIFICATION_APPLICATIONS_PREFIX', diff --git a/packages/celotool/src/lib/prom-to-sd-utils.ts b/packages/celotool/src/lib/prom-to-sd-utils.ts new file mode 100644 index 00000000000..4e7905daae7 --- /dev/null +++ b/packages/celotool/src/lib/prom-to-sd-utils.ts @@ -0,0 +1,85 @@ +import sleep from 'sleep-promise' +import { envVar, fetchEnv } from 'src/lib/env-utils' +import { installGenericHelmChart, removeGenericHelmChart } from 'src/lib/helm_deploy' +import { getStatefulSetReplicas, scaleResource } from 'src/lib/kubernetes' +import { execCmdWithExitOnFailure } from 'src/lib/utils' +import { getInternalTxNodeIPs, getInternalValidatorIPs } from 'src/lib/vm-testnet-utils' + +const helmChartPath = '../helm-charts/prometheus-to-sd' + +// This deploys a helm chart to Kubernetes that exports prometheus metrics from +// VM testnets Stackdriver + +export async function installHelmChart(celoEnv: string) { + return installGenericHelmChart( + celoEnv, + releaseName(celoEnv), + helmChartPath, + await helmParameters(celoEnv) + ) +} + +export async function removeHelmRelease(celoEnv: string) { + await removeGenericHelmChart(releaseName(celoEnv)) +} + +export async function upgradeHelmChart(celoEnv: string) { + console.info(`Upgrading helm release ${releaseName(celoEnv)}`) + + const statefulSetName = `${celoEnv}-prom-to-sd` + const replicaCount = await getStatefulSetReplicas(celoEnv, statefulSetName) + + console.info('Scaling StatefulSet down to 0...') + await scaleResource(celoEnv, 'statefulset', statefulSetName, 0) + await sleep(5000) + + const helmParams = await helmParameters(celoEnv) + + const upgradeCmdArgs = `${releaseName( + celoEnv + )} ${helmChartPath} --namespace ${celoEnv} ${helmParams.join(' ')}` + + if (process.env.CELOTOOL_VERBOSE === 'true') { + await execCmdWithExitOnFailure(`helm upgrade --debug --dry-run ${upgradeCmdArgs}`) + } + await execCmdWithExitOnFailure(`helm upgrade ${upgradeCmdArgs}`) + console.info(`Helm release ${releaseName(celoEnv)} upgrade successful`) + + console.info(`Scaling StatefulSet back up to ${replicaCount}...`) + await scaleResource(celoEnv, 'statefulset', statefulSetName, replicaCount) +} + +async function helmParameters(celoEnv: string) { + // The metrics endpoints are only exposed internally + const validatorIpAddresses = await getInternalValidatorIPs(celoEnv) + const validatorCount = parseInt(fetchEnv(envVar.VALIDATORS), 10) + const validatorPodIds = [] + for (let i = 0; i < validatorCount; i++) { + validatorPodIds.push(`${celoEnv}-validator-${i}`) + } + + const txNodeIpAddresses = await getInternalTxNodeIPs(celoEnv) + const txNodeCount = parseInt(fetchEnv(envVar.TX_NODES), 10) + const txNodePodIds = [] + for (let i = 0; i < txNodeCount; i++) { + txNodePodIds.push(`${celoEnv}-tx-node-${i}`) + } + + const allIps = validatorIpAddresses.concat(txNodeIpAddresses) + const sources = allIps.map((ip: string) => `http://${ip}:9200/metrics`) + + const allPodIds = validatorPodIds.concat(txNodePodIds) + + return [ + `--set metricsSources.geth="${sources.join('\\,')}"`, + `--set promtosd.scrape_interval=${fetchEnv(envVar.PROMTOSD_SCRAPE_INTERVAL)}`, + `--set promtosd.export_interval=${fetchEnv(envVar.PROMTOSD_EXPORT_INTERVAL)}`, + `--set promtosd.podIds="${allPodIds.join('\\,')}"`, + `--set promtosd.namespaceId=${celoEnv}`, + `--set replicaCount=${validatorCount + txNodeCount}`, + ] +} + +function releaseName(celoEnv: string) { + return `${celoEnv}-prom-to-sd` +} diff --git a/packages/celotool/src/lib/vm-testnet-utils.ts b/packages/celotool/src/lib/vm-testnet-utils.ts index 52dfe8258dd..9bcbde914a8 100644 --- a/packages/celotool/src/lib/vm-testnet-utils.ts +++ b/packages/celotool/src/lib/vm-testnet-utils.ts @@ -41,6 +41,8 @@ const testnetEnvVars: TerraformVars = { geth_verbosity: envVar.GETH_VERBOSITY, geth_bootnode_docker_image_repository: envVar.GETH_BOOTNODE_DOCKER_IMAGE_REPOSITORY, geth_bootnode_docker_image_tag: envVar.GETH_BOOTNODE_DOCKER_IMAGE_TAG, + geth_exporter_docker_image_repository: envVar.GETH_EXPORTER_DOCKER_IMAGE_REPOSITORY, + geth_exporter_docker_image_tag: envVar.GETH_EXPORTER_DOCKER_IMAGE_TAG, geth_node_docker_image_repository: envVar.GETH_NODE_DOCKER_IMAGE_REPOSITORY, geth_node_docker_image_tag: envVar.GETH_NODE_DOCKER_IMAGE_TAG, in_memory_discovery_table: envVar.IN_MEMORY_DISCOVERY_TABLE, @@ -297,6 +299,16 @@ export async function getTxNodeLoadBalancerIP(celoEnv: string) { return outputs.tx_node_lb_ip_address.value } +export async function getInternalValidatorIPs(celoEnv: string) { + const outputs = await getTestnetOutputs(celoEnv) + return outputs.validator_internal_ip_addresses.value +} + +export async function getInternalTxNodeIPs(celoEnv: string) { + const outputs = await getTestnetOutputs(celoEnv) + return outputs.tx_node_internal_ip_addresses.value +} + function getTerraformBackendConfigVars(celoEnv: string, terraformModule: string) { return { prefix: `${celoEnv}/${terraformModule}`, diff --git a/packages/helm-charts/prometheus-to-sd/templates/deployment.yaml b/packages/helm-charts/prometheus-to-sd/templates/deployment.yaml index 2961af21607..2b73186ee63 100644 --- a/packages/helm-charts/prometheus-to-sd/templates/deployment.yaml +++ b/packages/helm-charts/prometheus-to-sd/templates/deployment.yaml @@ -1,5 +1,5 @@ apiVersion: apps/v1beta1 -kind: Deployment +kind: StatefulSet metadata: name: {{ template "prometheus-to-sd.fullname" . }} labels: @@ -9,6 +9,7 @@ metadata: heritage: {{ .Release.Service }} spec: replicas: {{ .Values.replicaCount }} + serviceName: {{ template "prometheus-to-sd.fullname" . }} template: metadata: labels: @@ -23,16 +24,48 @@ spec: - name: profiler containerPort: {{ .Values.port }} command: - - /monitor - - --stackdriver-prefix=custom.googleapis.com - {{- range $key, $value := .Values.metricsSources }} - - --source={{ $key }}:{{ $value }} - {{- end }} - - --scrape-interval={{ .Values.promtosd.scrape_interval }} - - --export-interval={{ .Values.promtosd.export_interval }} + - /bin/sh + - "-c" + - |- + INDEX=${POD_NAME##*-} + + NAMESPACE_ID="{{ .Values.promtosd.namespaceId }}" + NAMESPACE_ID_FLAG="" + [ "$NAMESPACE_ID" ] && NAMESPACE_ID_FLAG="--namespace-id=$NAMESPACE_ID" + + POD_ID=`echo -n {{ .Values.promtosd.podIds }} | cut -d ',' -f $((INDEX + 1))` + POD_ID_FLAG="" + [ "$POD_ID" ] && POD_ID_FLAG="--pod-id=$POD_ID" + + /monitor \ + --stackdriver-prefix=custom.googleapis.com \ + {{- range $key, $value := .Values.metricsSources }} + --source={{ $key }}:$(echo -n "{{ $value }}" | cut -d ',' -f $((INDEX + 1))) \ + {{- end }} + --scrape-interval={{ .Values.promtosd.scrape_interval }} \ + --export-interval={{ .Values.promtosd.export_interval }} \ + $POD_ID_FLAG \ + $NAMESPACE_ID_FLAG resources: {{ toYaml .Values.resources | indent 12 }} + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name {{- if .Values.nodeSelector }} nodeSelector: {{ toYaml .Values.nodeSelector | indent 8 }} - {{- end }} \ No newline at end of file + {{- end }} +--- + +apiVersion: v1 +kind: Service +metadata: + name: {{ template "prometheus-to-sd.fullname" . }} + labels: + component: {{ template "prometheus-to-sd.fullname" . }} +spec: + clusterIP: None + selector: + app: {{ template "prometheus-to-sd.name" . }} diff --git a/packages/helm-charts/prometheus-to-sd/values.yaml b/packages/helm-charts/prometheus-to-sd/values.yaml index 09218c41abe..0059ab9dfa1 100644 --- a/packages/helm-charts/prometheus-to-sd/values.yaml +++ b/packages/helm-charts/prometheus-to-sd/values.yaml @@ -3,7 +3,13 @@ image: repository: gcr.io/google-containers/prometheus-to-sd tag: v0.3.2 pullPolicy: IfNotPresent -resources: {} +resources: + requests: + memory: 50M + cpu: 5m port: 6060 metricsSources: {} -nodeSelector: {} \ No newline at end of file +nodeSelector: {} +promtosd: + podIds: "" + namespaceId: "" diff --git a/packages/terraform-modules/testnet/main.tf b/packages/terraform-modules/testnet/main.tf index 6b926adf176..0ebd9bf49de 100644 --- a/packages/terraform-modules/testnet/main.tf +++ b/packages/terraform-modules/testnet/main.tf @@ -20,6 +20,11 @@ data "terraform_remote_state" "state" { } } +locals { + firewall_target_tags_bootnode = ["${var.celo_env}-bootnode"] + firewall_target_tags_node = ["${var.celo_env}-node"] +} + data "google_compute_network" "network" { name = var.network_name } @@ -28,6 +33,8 @@ resource "google_compute_firewall" "ssh_firewall" { name = "${var.celo_env}-ssh-firewall" network = data.google_compute_network.network.name + target_tags = concat(local.firewall_target_tags_bootnode, local.firewall_target_tags_node) + allow { protocol = "tcp" ports = ["22"] @@ -38,6 +45,8 @@ resource "google_compute_firewall" "geth_firewall" { name = "${var.celo_env}-geth-firewall" network = data.google_compute_network.network.name + target_tags = local.firewall_target_tags_node + allow { protocol = "tcp" ports = ["30303"] @@ -49,10 +58,27 @@ resource "google_compute_firewall" "geth_firewall" { } } +resource "google_compute_firewall" "geth_metrics_firewall" { + name = "${var.celo_env}-geth-metrics-firewall" + network = data.google_compute_network.network.name + + target_tags = local.firewall_target_tags_node + + # allow all IPs internal to the VPC + source_ranges = ["10.0.0.0/8"] + + allow { + protocol = "tcp" + ports = ["9200"] + } +} + resource "google_compute_firewall" "rpc_firewall" { name = "${var.celo_env}-rpc-firewall" network = data.google_compute_network.network.name + target_tags = local.firewall_target_tags_node + allow { protocol = "tcp" ports = ["8545", "8546"] @@ -63,6 +89,8 @@ resource "google_compute_firewall" "bootnode_firewall" { name = "${var.celo_env}-bootnode-firewall" network = data.google_compute_network.network.name + target_tags = local.firewall_target_tags_bootnode + allow { protocol = "udp" ports = ["30301"] @@ -93,6 +121,8 @@ module "tx_node" { gcloud_secrets_bucket = var.gcloud_secrets_bucket gcloud_vm_service_account_email = var.gcloud_vm_service_account_email genesis_content_base64 = var.genesis_content_base64 + geth_exporter_docker_image_repository = var.geth_exporter_docker_image_repository + geth_exporter_docker_image_tag = var.geth_exporter_docker_image_tag geth_node_docker_image_repository = var.geth_node_docker_image_repository geth_node_docker_image_tag = var.geth_node_docker_image_tag geth_verbosity = var.geth_verbosity @@ -123,6 +153,8 @@ module "validator" { gcloud_secrets_bucket = var.gcloud_secrets_bucket gcloud_vm_service_account_email = var.gcloud_vm_service_account_email genesis_content_base64 = var.genesis_content_base64 + geth_exporter_docker_image_repository = var.geth_exporter_docker_image_repository + geth_exporter_docker_image_tag = var.geth_exporter_docker_image_tag geth_node_docker_image_repository = var.geth_node_docker_image_repository geth_node_docker_image_tag = var.geth_node_docker_image_tag geth_verbosity = var.geth_verbosity diff --git a/packages/terraform-modules/testnet/modules/bootnode/main.tf b/packages/terraform-modules/testnet/modules/bootnode/main.tf index 50858cb72ec..26546019340 100644 --- a/packages/terraform-modules/testnet/modules/bootnode/main.tf +++ b/packages/terraform-modules/testnet/modules/bootnode/main.tf @@ -11,6 +11,8 @@ resource "google_compute_instance" "bootnode" { name = local.name_prefix machine_type = "n1-standard-1" + tags = [local.name_prefix] + allow_stopping_for_update = true boot_disk { diff --git a/packages/terraform-modules/testnet/modules/tx-node/main.tf b/packages/terraform-modules/testnet/modules/tx-node/main.tf index 3d3d6b422bf..73260bc9e9c 100644 --- a/packages/terraform-modules/testnet/modules/tx-node/main.tf +++ b/packages/terraform-modules/testnet/modules/tx-node/main.tf @@ -13,12 +13,22 @@ resource "google_compute_address" "tx_node" { } } +resource "google_compute_address" "tx_node_internal" { + name = "${local.name_prefix}-internal-address-${count.index}-${random_id.tx_node[count.index].hex}" + address_type = "INTERNAL" + purpose = "GCE_ENDPOINT" + + count = var.tx_node_count +} + resource "google_compute_instance" "tx_node" { name = "${local.name_prefix}-${count.index}-${random_id.tx_node[count.index].hex}" machine_type = "n1-standard-1" count = var.tx_node_count + tags = ["${var.celo_env}-node"] + allow_stopping_for_update = true boot_disk { @@ -33,6 +43,7 @@ resource "google_compute_instance" "tx_node" { network_interface { network = var.network_name + network_ip = google_compute_address.tx_node_internal[count.index].address access_config { nat_ip = google_compute_address.tx_node[count.index].address } @@ -46,6 +57,8 @@ resource "google_compute_instance" "tx_node" { gcloud_secrets_base_path : var.gcloud_secrets_base_path, gcloud_secrets_bucket : var.gcloud_secrets_bucket, genesis_content_base64 : var.genesis_content_base64, + geth_exporter_docker_image_repository : var.geth_exporter_docker_image_repository, + geth_exporter_docker_image_tag : var.geth_exporter_docker_image_tag, geth_node_docker_image_repository : var.geth_node_docker_image_repository, geth_node_docker_image_tag : var.geth_node_docker_image_tag, geth_verbosity : var.geth_verbosity, diff --git a/packages/terraform-modules/testnet/modules/tx-node/outputs.tf b/packages/terraform-modules/testnet/modules/tx-node/outputs.tf index 7996b2d56ed..f8749b4ee87 100644 --- a/packages/terraform-modules/testnet/modules/tx-node/outputs.tf +++ b/packages/terraform-modules/testnet/modules/tx-node/outputs.tf @@ -1,3 +1,7 @@ +output internal_ip_addresses { + value = google_compute_address.tx_node_internal.*.address +} + output ip_addresses { value = google_compute_address.tx_node.*.address } diff --git a/packages/terraform-modules/testnet/modules/tx-node/startup.sh b/packages/terraform-modules/testnet/modules/tx-node/startup.sh index 054ab8a8be8..67bafa60a4a 100644 --- a/packages/terraform-modules/testnet/modules/tx-node/startup.sh +++ b/packages/terraform-modules/testnet/modules/tx-node/startup.sh @@ -25,6 +25,8 @@ systemctl restart docker # ---- Set Up and Run Geth ---- +DATA_DIR=/root/.celo + GETH_NODE_DOCKER_IMAGE=${geth_node_docker_image_repository}:${geth_node_docker_image_tag} # download & apply secrets pulled from Cloud Storage as environment vars @@ -48,19 +50,19 @@ IN_MEMORY_DISCOVERY_TABLE_FLAG="" echo "Starting geth..." # We need to override the entrypoint in the geth image (which is originally `geth`) -docker run -p 8545:8545/tcp -p 8546:8546/tcp --name geth --net=host --entrypoint /bin/sh -d $GETH_NODE_DOCKER_IMAGE -c "\ +docker run -v $DATA_DIR:$DATA_DIR -p 8545:8545/tcp -p 8546:8546/tcp --name geth --net=host --entrypoint /bin/sh -d $GETH_NODE_DOCKER_IMAGE -c "\ set -euo pipefail && \ - mkdir -p /root/.celo/account /var/geth && \ + mkdir -p $DATA_DIR/account /var/geth && \ echo -n '${genesis_content_base64}' | base64 -d > /var/geth/genesis.json && \ - echo -n '${rid}' > /root/.celo/replica_id && \ - echo -n '${ip_address}' > /root/.celo/ipAddress && \ - echo -n '$PRIVATE_KEY' > /root/.celo/pkey && \ - echo -n '$ACCOUNT_ADDRESS' > /root/.celo/address && \ - echo -n '$BOOTNODE_ENODE_ADDRESS' > /root/.celo/bootnodeEnodeAddress && \ - echo -n '$BOOTNODE_ENODE' > /root/.celo/bootnodeEnode && \ - echo -n '$GETH_ACCOUNT_SECRET' > /root/.celo/account/accountSecret && \ + echo -n '${rid}' > $DATA_DIR/replica_id && \ + echo -n '${ip_address}' > $DATA_DIR/ipAddress && \ + echo -n '$PRIVATE_KEY' > $DATA_DIR/pkey && \ + echo -n '$ACCOUNT_ADDRESS' > $DATA_DIR/address && \ + echo -n '$BOOTNODE_ENODE_ADDRESS' > $DATA_DIR/bootnodeEnodeAddress && \ + echo -n '$BOOTNODE_ENODE' > $DATA_DIR/bootnodeEnode && \ + echo -n '$GETH_ACCOUNT_SECRET' > $DATA_DIR/account/accountSecret && \ geth init /var/geth/genesis.json && \ - geth account import --password /root/.celo/account/accountSecret /root/.celo/pkey && \ + geth account import --password $DATA_DIR/account/accountSecret $DATA_DIR/pkey && \ geth \ --bootnodes=enode://$BOOTNODE_ENODE \ --lightserv 90 \ @@ -75,7 +77,7 @@ docker run -p 8545:8545/tcp -p 8546:8546/tcp --name geth --net=host --entrypoint --wsaddr 0.0.0.0 \ --wsorigins=* \ --wsapi=eth,net,web3,debug \ - --nodekey=/root/.celo/pkey \ + --nodekey=$DATA_DIR/pkey \ --etherbase=$ACCOUNT_ADDRESS \ --networkid=${network_id} \ --syncmode=full \ @@ -87,3 +89,15 @@ docker run -p 8545:8545/tcp -p 8546:8546/tcp --name geth --net=host --entrypoint --nat=extip:${ip_address} \ --metrics \ $IN_MEMORY_DISCOVERY_TABLE_FLAG" + +# ---- Set Up and Run Geth Exporter ---- + +GETH_EXPORTER_DOCKER_IMAGE=${geth_exporter_docker_image_repository}:${geth_exporter_docker_image_tag} + +echo "Pulling geth exporter..." +docker pull $GETH_EXPORTER_DOCKER_IMAGE + +docker run -v $DATA_DIR:$DATA_DIR --name geth-exporter --net=host -d $GETH_EXPORTER_DOCKER_IMAGE \ + /usr/local/bin/geth_exporter \ + -ipc $DATA_DIR/geth.ipc \ + -filter "(.*overall|percentiles_95)" diff --git a/packages/terraform-modules/testnet/modules/tx-node/variables.tf b/packages/terraform-modules/testnet/modules/tx-node/variables.tf index c079e92aae9..d0daf3ae4ff 100644 --- a/packages/terraform-modules/testnet/modules/tx-node/variables.tf +++ b/packages/terraform-modules/testnet/modules/tx-node/variables.tf @@ -38,6 +38,16 @@ variable genesis_content_base64 { description = "Content of the genesis file encoded in base64" } +variable geth_exporter_docker_image_repository { + type = string + description = "Repository of the geth exporter docker image" +} + +variable geth_exporter_docker_image_tag { + type = string + description = "Tag of the geth exporter docker image" +} + variable geth_node_docker_image_repository { type = string description = "Repository of the geth docker image" diff --git a/packages/terraform-modules/testnet/modules/validator/main.tf b/packages/terraform-modules/testnet/modules/validator/main.tf index ac8fa42e90e..b8c594325c0 100644 --- a/packages/terraform-modules/testnet/modules/validator/main.tf +++ b/packages/terraform-modules/testnet/modules/validator/main.tf @@ -10,12 +10,22 @@ resource "google_compute_address" "validator" { count = var.validator_count } +resource "google_compute_address" "validator_internal" { + name = "${local.name_prefix}-internal-address-${count.index}" + address_type = "INTERNAL" + purpose = "GCE_ENDPOINT" + + count = var.validator_count +} + resource "google_compute_instance" "validator" { name = "${local.name_prefix}-${count.index}" machine_type = "n1-standard-1" count = var.validator_count + tags = ["${var.celo_env}-node"] + allow_stopping_for_update = true boot_disk { @@ -31,6 +41,7 @@ resource "google_compute_instance" "validator" { network_interface { network = var.network_name + network_ip = google_compute_address.validator_internal[count.index].address access_config { nat_ip = google_compute_address.validator[count.index].address } @@ -45,6 +56,8 @@ resource "google_compute_instance" "validator" { gcloud_secrets_base_path : var.gcloud_secrets_base_path, gcloud_secrets_bucket : var.gcloud_secrets_bucket, genesis_content_base64 : var.genesis_content_base64, + geth_exporter_docker_image_repository : var.geth_exporter_docker_image_repository, + geth_exporter_docker_image_tag : var.geth_exporter_docker_image_tag, geth_node_docker_image_repository : var.geth_node_docker_image_repository, geth_node_docker_image_tag : var.geth_node_docker_image_tag, geth_verbosity : var.geth_verbosity, diff --git a/packages/terraform-modules/testnet/modules/validator/outputs.tf b/packages/terraform-modules/testnet/modules/validator/outputs.tf new file mode 100644 index 00000000000..36040dbb6ab --- /dev/null +++ b/packages/terraform-modules/testnet/modules/validator/outputs.tf @@ -0,0 +1,3 @@ +output internal_ip_addresses { + value = google_compute_address.validator_internal.*.address +} diff --git a/packages/terraform-modules/testnet/modules/validator/startup.sh b/packages/terraform-modules/testnet/modules/validator/startup.sh index a2da75fa512..6bda9f14ea0 100644 --- a/packages/terraform-modules/testnet/modules/validator/startup.sh +++ b/packages/terraform-modules/testnet/modules/validator/startup.sh @@ -121,3 +121,15 @@ docker run -v $DATA_DIR:$DATA_DIR --name geth --net=host --entrypoint /bin/sh -d --metrics \ $IN_MEMORY_DISCOVERY_TABLE_FLAG \ )" + +# ---- Set Up and Run Geth Exporter ---- + +GETH_EXPORTER_DOCKER_IMAGE=${geth_exporter_docker_image_repository}:${geth_exporter_docker_image_tag} + +echo "Pulling geth exporter..." +docker pull $GETH_EXPORTER_DOCKER_IMAGE + +docker run -v $DATA_DIR:$DATA_DIR --name geth-exporter --net=host -d $GETH_EXPORTER_DOCKER_IMAGE \ + /usr/local/bin/geth_exporter \ + -ipc $DATA_DIR/geth.ipc \ + -filter "(.*overall|percentiles_95)" diff --git a/packages/terraform-modules/testnet/modules/validator/variables.tf b/packages/terraform-modules/testnet/modules/validator/variables.tf index ca03113d7a5..580fe7de5ab 100644 --- a/packages/terraform-modules/testnet/modules/validator/variables.tf +++ b/packages/terraform-modules/testnet/modules/validator/variables.tf @@ -38,6 +38,16 @@ variable genesis_content_base64 { description = "Content of the genesis file encoded in base64" } +variable geth_exporter_docker_image_repository { + type = string + description = "Repository of the geth exporter docker image" +} + +variable geth_exporter_docker_image_tag { + type = string + description = "Tag of the geth exporter docker image" +} + variable geth_node_docker_image_repository { type = string description = "Repository of the geth docker image" diff --git a/packages/terraform-modules/testnet/outputs.tf b/packages/terraform-modules/testnet/outputs.tf index cc34fb68319..bba30a8c0dd 100644 --- a/packages/terraform-modules/testnet/outputs.tf +++ b/packages/terraform-modules/testnet/outputs.tf @@ -1,3 +1,7 @@ +output tx_node_internal_ip_addresses { + value = module.tx_node.internal_ip_addresses +} + output tx_node_ip_addresses { value = module.tx_node.ip_addresses } @@ -5,3 +9,7 @@ output tx_node_ip_addresses { output tx_node_lb_ip_address { value = module.tx_node_lb.ip_address } + +output validator_internal_ip_addresses { + value = module.validator.internal_ip_addresses +} diff --git a/packages/terraform-modules/testnet/variables.tf b/packages/terraform-modules/testnet/variables.tf index 18c9a0ba6be..44f29fdec78 100644 --- a/packages/terraform-modules/testnet/variables.tf +++ b/packages/terraform-modules/testnet/variables.tf @@ -53,6 +53,16 @@ variable geth_bootnode_docker_image_tag { description = "Tag of the bootnode docker image" } +variable geth_exporter_docker_image_repository { + type = string + description = "Repository of the geth exporter docker image" +} + +variable geth_exporter_docker_image_tag { + type = string + description = "Tag of the geth exporter docker image" +} + variable geth_node_docker_image_repository { type = string description = "Repository of the geth docker image"