diff --git a/spartan/aztec-network/templates/boot-node.yaml b/spartan/aztec-network/templates/boot-node.yaml index 5bf506ff585..5f29df22010 100644 --- a/spartan/aztec-network/templates/boot-node.yaml +++ b/spartan/aztec-network/templates/boot-node.yaml @@ -235,7 +235,7 @@ spec: apiVersion: v1 kind: Service metadata: - name: {{ include "aztec-network.fullname" . }}-boot-node-lb-tcp + name: boot-node-lb-tcp labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: @@ -252,7 +252,7 @@ spec: apiVersion: v1 kind: Service metadata: - name: {{ include "aztec-network.fullname" . }}-boot-node-lb-udp + name: boot-node-lb-udp annotations: service.beta.kubernetes.io/aws-load-balancer-type: "nlb" service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "ip" diff --git a/spartan/aztec-network/templates/prover-node.yaml b/spartan/aztec-network/templates/prover-node.yaml index bd2f305710b..d06322b4b79 100644 --- a/spartan/aztec-network/templates/prover-node.yaml +++ b/spartan/aztec-network/templates/prover-node.yaml @@ -182,11 +182,11 @@ spec: name: p2p-udp protocol: UDP --- -{{if .Values.network.public }} +{{if .Values.proverNode.public }} apiVersion: v1 kind: Service metadata: - name: {{ include "aztec-network.fullname" . }}-prover-node-lb-tcp + name: prover-node-lb-tcp labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: @@ -203,7 +203,7 @@ spec: apiVersion: v1 kind: Service metadata: - name: {{ include "aztec-network.fullname" . }}-prover-node-lb-udp + name: prover-node-lb-udp annotations: service.beta.kubernetes.io/aws-load-balancer-type: "nlb" service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "ip" diff --git a/spartan/aztec-network/templates/pxe.yaml b/spartan/aztec-network/templates/pxe.yaml index dbe03396d60..dbfa87e8f3f 100644 --- a/spartan/aztec-network/templates/pxe.yaml +++ b/spartan/aztec-network/templates/pxe.yaml @@ -102,7 +102,7 @@ spec: apiVersion: v1 kind: Service metadata: - name: {{ include "aztec-network.fullname" . }}-pxe-lb + name: pxe-lb labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: diff --git a/spartan/aztec-network/templates/reth.yaml b/spartan/aztec-network/templates/reth.yaml index 5c2dea00d93..7312bab7ad5 100644 --- a/spartan/aztec-network/templates/reth.yaml +++ b/spartan/aztec-network/templates/reth.yaml @@ -87,7 +87,7 @@ spec: apiVersion: v1 kind: Service metadata: - name: {{ include "aztec-network.fullname" . }}-ethereum-lb + name: ethereum-lb labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: @@ -114,6 +114,7 @@ data: genesis.json: | {{ .Files.Get "files/config/genesis.json" | nindent 4 }} --- +{{- if gt (.Values.ethereum.replicas | int) 0 }} apiVersion: v1 kind: PersistentVolumeClaim metadata: @@ -125,4 +126,5 @@ spec: resources: requests: storage: {{ .Values.ethereum.storage }} +{{- end }} --- \ No newline at end of file diff --git a/spartan/aztec-network/templates/validator.yaml b/spartan/aztec-network/templates/validator.yaml index ac6223b5dff..4ec630257c3 100644 --- a/spartan/aztec-network/templates/validator.yaml +++ b/spartan/aztec-network/templates/validator.yaml @@ -219,7 +219,7 @@ spec: apiVersion: v1 kind: Service metadata: - name: {{ include "aztec-network.fullname" $ }}-validator-{{ $i }}-lb-tcp + name: validator-{{ $i }}-lb-tcp annotations: service.beta.kubernetes.io/aws-load-balancer-type: "nlb" service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "ip" @@ -244,7 +244,7 @@ spec: apiVersion: v1 kind: Service metadata: - name: {{ include "aztec-network.fullname" $ }}-validator-{{ $i }}-lb-udp + name: validator-{{ $i }}-lb-udp annotations: service.beta.kubernetes.io/aws-load-balancer-type: "nlb" service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "ip" diff --git a/spartan/aztec-network/values.yaml b/spartan/aztec-network/values.yaml index 4c1ff560df4..3d0172d787c 100644 --- a/spartan/aztec-network/values.yaml +++ b/spartan/aztec-network/values.yaml @@ -101,6 +101,7 @@ validator: storage: "8Gi" proverNode: + public: false externalTcpHost: "" externalUdpHost: "" replicas: 1 diff --git a/spartan/aztec-network/values/multicloud-demo.yaml b/spartan/aztec-network/values/multicloud-demo.yaml new file mode 100644 index 00000000000..6ba49557253 --- /dev/null +++ b/spartan/aztec-network/values/multicloud-demo.yaml @@ -0,0 +1,31 @@ +# deployment syntax: helm upgrade --install srp-test . -n srp-test --create-namespace --values "./values/multicloud-demo.yaml" + +telemetry: + enabled: false + otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 + +validator: + replicas: 1 + validatorKeys: + - 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 + validatorAddresses: + - 0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266 + validator: + disabled: false + +bootNode: + validator: + disabled: true + +proverNode: + realProofs: false + +proverAgent: + replicas: 1 + realProofs: false + bb: + hardwareConcurrency: 16 + +aztec: + slotDuration: 36 + epochDuration: 32 diff --git a/spartan/terraform/eks-cluster/firewall.tf b/spartan/terraform/eks-cluster/firewall.tf new file mode 100644 index 00000000000..5ed3cb19c8e --- /dev/null +++ b/spartan/terraform/eks-cluster/firewall.tf @@ -0,0 +1,112 @@ +# security_groups.tf + +resource "aws_security_group" "node_traffic" { + name_prefix = "eks-node-traffic" + description = "Security group for EKS node UDP and TCP traffic" + vpc_id = module.vpc.vpc_id # Fixed VPC reference to use the vpc module output + + # Ingress UDP rules + ingress { + from_port = 40400 + to_port = 40499 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming UDP traffic for original port range" + } + + ingress { + from_port = 8080 + to_port = 8080 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming UDP traffic on port 8080" + } + + ingress { + from_port = 8545 + to_port = 8545 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming UDP traffic on port 8545" + } + + # Ingress TCP rules + ingress { + from_port = 40400 + to_port = 40499 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming TCP traffic for original port range" + } + + ingress { + from_port = 8080 + to_port = 8080 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming TCP traffic on port 8080" + } + + ingress { + from_port = 8545 + to_port = 8545 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming TCP traffic on port 8545" + } + + # Egress UDP rules + egress { + from_port = 40400 + to_port = 40499 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing UDP traffic for original port range" + } + + egress { + from_port = 8080 + to_port = 8080 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing UDP traffic on port 8080" + } + + egress { + from_port = 8545 + to_port = 8545 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing UDP traffic on port 8545" + } + + # Egress TCP rules + egress { + from_port = 40400 + to_port = 40499 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing TCP traffic for original port range" + } + + egress { + from_port = 8080 + to_port = 8080 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing TCP traffic on port 8080" + } + + egress { + from_port = 8545 + to_port = 8545 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing TCP traffic on port 8545" + } + + tags = { + Name = "${var.cluster_name}-node-traffic" + Project = var.cluster_name + } +} diff --git a/spartan/terraform/eks-cluster/main.tf b/spartan/terraform/eks-cluster/main.tf index 5dc2fe23511..ee384f91353 100644 --- a/spartan/terraform/eks-cluster/main.tf +++ b/spartan/terraform/eks-cluster/main.tf @@ -26,54 +26,6 @@ data "aws_availability_zones" "available" { } } -# Create security group for node traffic -resource "aws_security_group" "node_traffic" { - name_prefix = "eks-node-traffic" - description = "Security group for EKS node UDP and TCP traffic" - vpc_id = module.vpc.vpc_id - - # Ingress UDP rule - ingress { - from_port = 40400 - to_port = 40499 - protocol = "udp" - cidr_blocks = ["0.0.0.0/0"] - description = "Allow incoming UDP traffic" - } - - # Ingress TCP rule - ingress { - from_port = 40400 - to_port = 40499 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - description = "Allow incoming TCP traffic" - } - - # Egress UDP rule - egress { - from_port = 40400 - to_port = 40499 - protocol = "udp" - cidr_blocks = ["0.0.0.0/0"] - description = "Allow outgoing UDP traffic" - } - - # Egress TCP rule - egress { - from_port = 40400 - to_port = 40499 - protocol = "tcp" - cidr_blocks = ["0.0.0.0/0"] - description = "Allow outgoing TCP traffic" - } - - tags = { - Name = "${var.cluster_name}-node-traffic" - Project = var.cluster_name - } -} - module "vpc" { source = "terraform-aws-modules/vpc/aws" version = "5.8.1" diff --git a/spartan/terraform/gke-cluster/firewall.tf b/spartan/terraform/gke-cluster/firewall.tf new file mode 100644 index 00000000000..0c5741c8506 --- /dev/null +++ b/spartan/terraform/gke-cluster/firewall.tf @@ -0,0 +1,51 @@ +# Create ingress firewall rules for UDP +resource "google_compute_firewall" "udp_ingress" { + name = "allow-udp-ingress-custom" + network = "default" + allow { + protocol = "udp" + ports = ["40400-40499", "8080", "8545"] + } + direction = "INGRESS" + source_ranges = ["0.0.0.0/0"] + target_tags = ["gke-node"] +} + +# Create egress firewall rules for UDP +resource "google_compute_firewall" "udp_egress" { + name = "allow-udp-egress-custom" + network = "default" + allow { + protocol = "udp" + ports = ["40400-40499", "8080", "8545"] + } + direction = "EGRESS" + destination_ranges = ["0.0.0.0/0"] + target_tags = ["gke-node"] +} + +# Create ingress firewall rules for TCP +resource "google_compute_firewall" "tcp_ingress" { + name = "allow-tcp-ingress-custom" + network = "default" + allow { + protocol = "tcp" + ports = ["40400-40499", "8080", "8545"] + } + direction = "INGRESS" + source_ranges = ["0.0.0.0/0"] + target_tags = ["gke-node"] +} + +# Create egress firewall rules for TCP +resource "google_compute_firewall" "tcp_egress" { + name = "allow-tcp-egress-custom" + network = "default" + allow { + protocol = "tcp" + ports = ["40400-40499", "8080", "8545"] + } + direction = "EGRESS" + destination_ranges = ["0.0.0.0/0"] + target_tags = ["gke-node"] +} diff --git a/spartan/terraform/gke-cluster/main.tf b/spartan/terraform/gke-cluster/main.tf index 46c1a51dc6c..fce5b5f02e4 100644 --- a/spartan/terraform/gke-cluster/main.tf +++ b/spartan/terraform/gke-cluster/main.tf @@ -33,48 +33,17 @@ resource "google_project_iam_member" "gke_sa_roles" { "roles/monitoring.viewer", "roles/artifactregistry.reader" ]) - project = var.project role = each.key member = "serviceAccount:${google_service_account.gke_sa.email}" } -# Create ingress firewall rule for UDP -resource "google_compute_firewall" "udp_ingress" { - name = "allow-udp-ingress-40400-40499" - network = "default" - - allow { - protocol = "udp" - ports = ["40400-40499"] - } - - direction = "INGRESS" - source_ranges = ["0.0.0.0/0"] - target_tags = ["gke-node"] -} - -# Create egress firewall rule for UDP -resource "google_compute_firewall" "udp_egress" { - name = "allow-udp-egress-40400-40499" - network = "default" - - allow { - protocol = "udp" - ports = ["40400-40499"] - } - - direction = "EGRESS" - destination_ranges = ["0.0.0.0/0"] - target_tags = ["gke-node"] -} - # Create a GKE cluster resource "google_container_cluster" "primary" { - name = "spartan-gke" - location = var.zone - initial_node_count = 1 + name = "spartan-gke" + location = var.zone + initial_node_count = 1 # Remove default node pool after cluster creation remove_default_node_pool = true @@ -107,7 +76,7 @@ resource "google_container_node_pool" "primary_nodes" { # Node configuration node_config { - machine_type = "t2d-standard-16" + machine_type = "t2d-standard-32" service_account = google_service_account.gke_sa.email oauth_scopes = [ @@ -117,7 +86,6 @@ resource "google_container_node_pool" "primary_nodes" { labels = { env = "production" } - tags = ["gke-node"] } @@ -142,7 +110,7 @@ resource "google_container_node_pool" "spot_nodes" { # Node configuration node_config { - machine_type = "t2d-standard-16" + machine_type = "t2d-standard-32" spot = true service_account = google_service_account.gke_sa.email @@ -154,7 +122,6 @@ resource "google_container_node_pool" "spot_nodes" { env = "production" pool = "spot" } - tags = ["gke-node", "spot"] # Spot instance termination handler diff --git a/spartan/terraform/multicloud-deploy/data.tf b/spartan/terraform/multicloud-deploy/data.tf new file mode 100644 index 00000000000..cf9a945c0bd --- /dev/null +++ b/spartan/terraform/multicloud-deploy/data.tf @@ -0,0 +1,54 @@ +# Get the LoadBalancer DNS names using a data source +data "kubernetes_service" "lb_ethereum_tcp" { + provider = kubernetes.eks-cluster + metadata { + name = "ethereum-lb" + namespace = var.testnet_name + } + depends_on = [helm_release.aztec-eks-cluster] +} + +data "kubernetes_service" "lb_boot_node_tcp" { + provider = kubernetes.eks-cluster + metadata { + name = "boot-node-lb-tcp" + namespace = var.testnet_name + } + depends_on = [helm_release.aztec-eks-cluster] +} + +data "kubernetes_service" "lb_boot_node_udp" { + provider = kubernetes.eks-cluster + metadata { + name = "boot-node-lb-udp" + namespace = var.testnet_name + } + depends_on = [helm_release.aztec-eks-cluster] +} + +data "kubernetes_service" "lb_validator_tcp" { + provider = kubernetes.eks-cluster + metadata { + name = "validator-0-lb-tcp" + namespace = var.testnet_name + } + depends_on = [helm_release.aztec-eks-cluster] +} + +data "kubernetes_service" "lb_validator_udp" { + provider = kubernetes.eks-cluster + metadata { + name = "validator-0-lb-udp" + namespace = var.testnet_name + } + depends_on = [helm_release.aztec-eks-cluster] +} + +data "kubernetes_service" "lb_pxe_tcp" { + provider = kubernetes.eks-cluster + metadata { + name = "pxe-lb" + namespace = var.testnet_name + } + depends_on = [helm_release.aztec-eks-cluster] +} diff --git a/spartan/terraform/multicloud-deploy/main.tf b/spartan/terraform/multicloud-deploy/main.tf index add82c42b26..4cf66659de0 100644 --- a/spartan/terraform/multicloud-deploy/main.tf +++ b/spartan/terraform/multicloud-deploy/main.tf @@ -54,10 +54,28 @@ resource "helm_release" "aztec-eks-cluster" { chart = "aztec-network" namespace = var.testnet_name create_namespace = true - values = [file("../../aztec-network/values/${var.eks-values-file}")] + + # base values file + values = [file("../../aztec-network/values/${var.values-file}")] + + # removing prover nodes + set { + name = "network.public" + value = true + } + + set { + name = "proverNode.replicas" + value = "0" + } + + set { + name = "proverAgent.replicas" + value = "0" + } # Setting timeout and wait conditions - timeout = 1800 # 30 minutes in seconds + timeout = 600 # 10 minutes in seconds wait = true wait_for_jobs = true } @@ -70,10 +88,83 @@ resource "helm_release" "aztec-gke-cluster" { chart = "aztec-network" namespace = var.testnet_name create_namespace = true - values = [file("../../aztec-network/values/${var.gke-values-file}")] + + # base values file + values = [file("../../aztec-network/values/${var.values-file}")] + + # disabling all nodes except provers + set { + name = "network.setupL2Contracts" + value = false + } + + set { + name = "proverNode.public" + value = true + } + + set { + name = "bootNode.replicas" + value = "0" + } + + set { + name = "validator.replicas" + value = "0" + } + + set { + name = "pxe.replicas" + value = "0" + } + + set { + name = "bot.replicas" + value = "0" + } + + set { + name = "ethereum.replicas" + value = "0" + } + + # pointing Google Cloud provers to nodes in AWS + set { + name = "ethereum.externalHost" + value = data.kubernetes_service.lb_ethereum_tcp.status.0.load_balancer.0.ingress.0.hostname + } + + set { + name = "bootNode.externalTcpHost" + value = data.kubernetes_service.lb_boot_node_tcp.status.0.load_balancer.0.ingress.0.hostname + } + + set { + name = "bootNode.externalUdpHost" + value = data.kubernetes_service.lb_boot_node_udp.status.0.load_balancer.0.ingress.0.hostname + } + + set { + name = "validator.externalTcpHost" + value = data.kubernetes_service.lb_validator_tcp.status.0.load_balancer.0.ingress.0.hostname + } + + set { + name = "validator.externalUdpHost" + value = data.kubernetes_service.lb_validator_udp.status.0.load_balancer.0.ingress.0.hostname + } + + set { + name = "pxe.externalHost" + value = data.kubernetes_service.lb_pxe_tcp.status.0.load_balancer.0.ingress.0.hostname + } # Setting timeout and wait conditions - timeout = 1800 # 30 minutes in seconds + timeout = 600 # 10 minutes in seconds wait = true wait_for_jobs = true + + depends_on = [ + helm_release.aztec-eks-cluster + ] } diff --git a/spartan/terraform/multicloud-deploy/outputs.tf b/spartan/terraform/multicloud-deploy/outputs.tf index 90b7a51a6d0..fb922104e1f 100644 --- a/spartan/terraform/multicloud-deploy/outputs.tf +++ b/spartan/terraform/multicloud-deploy/outputs.tf @@ -6,20 +6,56 @@ output "eks_cluster_deployment" { chart = helm_release.aztec-eks-cluster.chart version = helm_release.aztec-eks-cluster.version status = helm_release.aztec-eks-cluster.status - values_file = var.eks-values-file + values_file = var.values-file cluster = var.eks_cluster_context } } -output "gke_cluster_deployment" { - description = "Details of the GKE cluster Helm deployment" - value = { - name = helm_release.aztec-gke-cluster.name - namespace = helm_release.aztec-gke-cluster.namespace - chart = helm_release.aztec-gke-cluster.chart - version = helm_release.aztec-gke-cluster.version - status = helm_release.aztec-gke-cluster.status - values_file = var.gke-values-file - cluster = var.gke_cluster_context - } +# output "gke_cluster_deployment" { +# description = "Details of the GKE cluster Helm deployment" +# value = { +# name = helm_release.aztec-gke-cluster.name +# namespace = helm_release.aztec-gke-cluster.namespace +# chart = helm_release.aztec-gke-cluster.chart +# version = helm_release.aztec-gke-cluster.version +# status = helm_release.aztec-gke-cluster.status +# values_file = var.values-file +# cluster = var.gke_cluster_context +# } +# } + +output "external_ethereum_tcp" { + description = "DNS hostname of the EKS Ethereum LoadBalancer" + value = data.kubernetes_service.lb_ethereum_tcp.status.0.load_balancer.0.ingress.0.hostname + depends_on = [helm_release.aztec-eks-cluster] +} + +output "external_boot_node_tcp" { + description = "DNS hostname of the EKS boot node LoadBalancer" + value = data.kubernetes_service.lb_boot_node_tcp.status.0.load_balancer.0.ingress.0.hostname + depends_on = [helm_release.aztec-eks-cluster] +} + +output "external_boot_node_udp" { + description = "DNS hostname of the EKS boot node LoadBalancer" + value = data.kubernetes_service.lb_boot_node_udp.status.0.load_balancer.0.ingress.0.hostname + depends_on = [helm_release.aztec-eks-cluster] +} + +output "external_validator_tcp" { + description = "DNS hostname of the EKS validator LoadBalancer" + value = data.kubernetes_service.lb_validator_tcp.status.0.load_balancer.0.ingress.0.hostname + depends_on = [helm_release.aztec-eks-cluster] +} + +output "external_validator_udp" { + description = "DNS hostname of the EKS validator LoadBalancer" + value = data.kubernetes_service.lb_validator_udp.status.0.load_balancer.0.ingress.0.hostname + depends_on = [helm_release.aztec-eks-cluster] +} + +output "external_pxe_tcp" { + description = "DNS hostname of the EKS PXE LoadBalancer" + value = data.kubernetes_service.lb_pxe_tcp.status.0.load_balancer.0.ingress.0.hostname + depends_on = [helm_release.aztec-eks-cluster] } diff --git a/spartan/terraform/multicloud-deploy/variables.tf b/spartan/terraform/multicloud-deploy/variables.tf index b58de3d7d5a..91486bd432a 100644 --- a/spartan/terraform/multicloud-deploy/variables.tf +++ b/spartan/terraform/multicloud-deploy/variables.tf @@ -7,23 +7,17 @@ variable "eks_cluster_context" { variable "gke_cluster_context" { description = "GKE cluster context" type = string - default = "gke_testnet-440309_us-east1_spartan-provers" + default = "gke_testnet-440309_us-east4-a_spartan-gke" } variable "testnet_name" { description = "Name of helm deployment and k8s namespace" type = string - default = "terratest" + default = "multicloud" } -variable "eks-values-file" { - description = "Name of the values file to use for eks cluster" +variable "values-file" { + description = "Name of the values file to use for deployment" type = string - default = "1-validators.yaml" -} - -variable "gke-values-file" { - description = "Name of the values file to use for gke cluster" - type = string - default = "1-validators.yaml" + default = "multicloud-demo.yaml" }