Skip to content

Commit

Permalink
feat: BYO VPC (#168)
Browse files Browse the repository at this point in the history
* Adding conditional creation of Elasticache subnet group for BYOVPC + redis deployment case

* feat: BYO VPC and Elasticache changes

* feat: add ALB internal option

* feat: allow custom override of subnets

---------

Co-authored-by: Timothy Shoaf <tshoaf@roblox.com>
Co-authored-by: Justin Brooks <jsbroks@gmail.com>
Co-authored-by: semantic-release-bot <semantic-release-bot@martynus.net>
Co-authored-by: George Scott <george.scott@wandb.com>
Co-authored-by: Veronica JungYeon Kim <veronica@wandb.com>
  • Loading branch information
6 people authored Feb 20, 2024
1 parent c6390f2 commit 35b1efc
Show file tree
Hide file tree
Showing 9 changed files with 434 additions and 11 deletions.
67 changes: 67 additions & 0 deletions examples/byo-vpc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Deploy W&B required infrastructure to an existing VPC and existing EKS

## About

This example is a minimal example of what is needed to deploy an instance of
Weights & Biases that uses an external DNS into an already existing VPC.

## Module Prerequites

As with the main version of this module, this example assumes the following
resources already exist:

- Valid subdomain that uses Amazon Route 53 as the Dns services ([Learn more
here](<(https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/CreatingNewSubdomain.html)>)
1. Create a Route53 zone for `<subdomain>.<domain_name>`. When you want to use
Amazon Route 53 as the DNS service for a new subdomain without migrating
the parent domain, you start by creating a hosted zone for the subdomain.
2. Create a Namespace Record (NS) in your external DNS provide that points to
this Route53 zone. Update the DNS service for the parent domain by adding
NS records for the subdomain. This is known as delegating responsibility
for the subdomain to Route 53. For example, if the parent domain
example.com is hosted with another DNS service and you created the
subdomain test.example.com in Route 53, you must update the DNS service for
example.com with new NS records for test.example.com.
- An existing VPC with public and/or private subnets.
- Valid W&B Local license (You get one at [here](https://deploy.wandb.ai))

## A sample Terraform Variables Example looks like:

Create a `terraform.tfvars` file in this directory before running this example

```ini
namespace = ""
domain_name = ""
zone_id = "Z0322..."
subdomain = ""
wandb_license = ""
wandb_version = "0.49.0"

#allowed_inbound_cidr = [
# "0.0.0.0/0",
# "192.168.0.0/16"
#]

#disable ipv6
#allowed_inbound_ipv6_cidr = ["::/1"]

enable_dummy_dns = false
enable_operator_alb = false

eks_cluster_version = "1.25"

vpc_id = "vpc-0a..."
vpc_cidr = "10.x.x.x/x"

network_private_subnets = [ "subnet-03...", "subnet-08..." ]
network_private_subnet_cidrs = ["10.x.x.x/x", "10.x.x.x/x"]

network_public_subnets = []
network_public_subnet_cidrs = []

network_database_subnets = [ "subnet-06...", "subnet-02..." ]
network_database_subnet_cidrs = ["10.x.x.x/x", "10.x.x.x/x"]

network_elasticache_subnets = [ "subnet-05...", "subnet-0e..." ]
# network_elasticache_subnet_cidrs = ["10.x.x.x/x", "10.x.x.x/x"]
```
131 changes: 131 additions & 0 deletions examples/byo-vpc/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
provider "aws" {
region = "us-west-2"


default_tags {
tags = {
GithubRepo = "terraform-aws-wandb"
GithubOrg = "wandb"
Enviroment = "Example"
Example = "PublicDnsExternal"
}
}
}

module "wandb_infra" {
source = "../../"

namespace = var.namespace
public_access = true
external_dns = true

enable_dummy_dns = var.enable_dummy_dns
enable_operator_alb = var.enable_operator_alb

deletion_protection = true

create_vpc = false

size = "medium"

network_id = var.vpc_id
network_cidr = var.vpc_cidr

network_private_subnets = var.network_private_subnets
network_public_subnets = var.network_public_subnets
network_database_subnets = var.network_database_subnets
network_private_subnet_cidrs = var.network_private_subnet_cidrs
network_public_subnet_cidrs = var.network_public_subnet_cidrs
network_database_subnet_cidrs = var.network_database_subnet_cidrs
network_elasticache_subnets = var.network_elasticache_subnets

database_instance_class = var.database_instance_class
database_engine_version = var.database_engine_version
database_snapshot_identifier = var.database_snapshot_identifier
database_sort_buffer_size = var.database_sort_buffer_size

allowed_inbound_cidr = var.allowed_inbound_cidr
allowed_inbound_ipv6_cidr = ["::/0"]

eks_cluster_version = var.eks_cluster_version
kubernetes_public_access = true
kubernetes_public_access_cidrs = ["0.0.0.0/0"]

domain_name = var.domain_name
zone_id = var.zone_id
subdomain = var.subdomain

license = var.wandb_license

bucket_name = var.bucket_name
bucket_kms_key_arn = var.bucket_kms_key_arn
use_internal_queue = true
}

data "aws_eks_cluster" "app_cluster" {
name = module.wandb_infra.cluster_id
}

data "aws_eks_cluster_auth" "app_cluster" {
name = module.wandb_infra.cluster_id
}

provider "kubernetes" {
host = data.aws_eks_cluster.app_cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.app_cluster.token
exec {
api_version = "client.authentication.k8s.io/v1beta1"
args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name]
command = "aws"
}
}

provider "helm" {
kubernetes {
host = data.aws_eks_cluster.app_cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.app_cluster.token
exec {
api_version = "client.authentication.k8s.io/v1beta1"
args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name]
command = "aws"
}
}
}

module "wandb_app" {
source = "wandb/wandb/kubernetes"
version = "1.12.0"

license = var.wandb_license

host = module.wandb_infra.url
bucket = "s3://${module.wandb_infra.bucket_name}"
bucket_aws_region = module.wandb_infra.bucket_region
bucket_queue = "internal://"
bucket_kms_key_arn = module.wandb_infra.kms_key_arn
database_connection_string = "mysql://${module.wandb_infra.database_connection_string}"
redis_connection_string = "redis://${module.wandb_infra.elasticache_connection_string}?tls=true&ttlInSeconds=604800"

wandb_image = var.wandb_image
wandb_version = var.wandb_version

service_port = module.wandb_infra.internal_app_port

# If we dont wait, tf will start trying to deploy while the work group is
# still spinning up
depends_on = [module.wandb_infra]

other_wandb_env = merge({
"GORILLA_CUSTOMER_SECRET_STORE_SOURCE" = "aws-secretmanager://${var.namespace}?namespace=${var.namespace}"
}, var.other_wandb_env)
}

output "bucket_name" {
value = module.wandb_infra.bucket_name
}

output "bucket_queue_name" {
value = module.wandb_infra.bucket_queue_name
}
161 changes: 161 additions & 0 deletions examples/byo-vpc/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
variable "namespace" {
type = string
description = "Name prefix used for resources"
}

variable "domain_name" {
type = string
description = "Domain name used to access instance."
}

variable "zone_id" {
type = string
description = "Id of Route53 zone"
}

variable "subdomain" {
type = string
default = null
description = "Subdomain for accessing the Weights & Biases UI."
}

variable "wandb_license" {
type = string
}

variable "database_engine_version" {
description = "Version for MySQL Auora"
type = string
default = "8.0.mysql_aurora.3.02.2"
}

variable "database_instance_class" {
description = "Instance type to use by database master instance."
type = string
default = "db.r5.large"
}

variable "database_snapshot_identifier" {
description = "Specifies whether or not to create this cluster from a snapshot. You can use either the name or ARN when specifying a DB cluster snapshot, or the ARN when specifying a DB snapshot"
type = string
default = null
}

variable "database_sort_buffer_size" {
description = "Specifies the sort_buffer_size value to set for the database"
type = number
default = 262144
}

variable "wandb_version" {
description = "The version of Weights & Biases local to deploy."
type = string
default = "latest"
}

variable "kubernetes_instance_types" {
description = "EC2 Instance type for primary node group."
type = list(string)
default = ["m5.large"]
}

variable "eks_cluster_version" {
description = "EKS cluster kubernetes version"
nullable = false
type = string
default = "1.25"
}

variable "wandb_image" {
description = "Docker repository of to pull the wandb image from."
type = string
default = "wandb/local"
}

variable "bucket_name" {
type = string
default = ""
}

variable "bucket_kms_key_arn" {
type = string
description = "The Amazon Resource Name of the KMS key with which S3 storage bucket objects will be encrypted."
default = ""
}


variable "allowed_inbound_cidr" {
default = ["0.0.0.0/0"]
nullable = false
type = list(string)
}


variable "allowed_inbound_ipv6_cidr" {
default = ["::/0"]
nullable = false
type = list(string)
}

variable "other_wandb_env" {
type = map(string)
description = "Extra environment variables for W&B"
default = {}
}

variable "enable_operator_alb" {
type = bool
default = false
description = "Boolean indicating whether to use operatore ALB (true) or not (false)."
}

variable "enable_dummy_dns" {
type = bool
default = false
description = "Boolean indicating whether or not to enable dummy DNS for the old alb"
}

variable "vpc_id" {
type = string
description = "VPC network ID"
}

variable "vpc_cidr" {
type = string
description = "VPC network CIDR"
}

variable "network_private_subnets" {
type = list(string)
description = "Subnet IDs"
}

variable "network_public_subnets" {
type = list(string)
description = "Subnet IDs"
}

variable "network_database_subnets" {
type = list(string)
description = "Subnet IDs"
}

variable "network_private_subnet_cidrs" {
type = list(string)
description = "Subnet CIDRs"
}

variable "network_public_subnet_cidrs" {
type = list(string)
description = "Subnet CIDRs"
}

variable "network_database_subnet_cidrs" {
type = list(string)
description = "Subnet CIDRs"
}

variable "network_elasticache_subnets" {
type = list(string)
description = "Subnet CIDRs"
}
10 changes: 10 additions & 0 deletions examples/public-dns-external/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,23 @@ provider "kubernetes" {
host = data.aws_eks_cluster.app_cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.app_cluster.token
exec {
api_version = "client.authentication.k8s.io/v1beta1"
args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name]
command = "aws"
}
}

provider "helm" {
kubernetes {
host = data.aws_eks_cluster.app_cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.app_cluster.token
exec {
api_version = "client.authentication.k8s.io/v1beta1"
args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name]
command = "aws"
}
}
}

Expand Down
Loading

0 comments on commit 35b1efc

Please sign in to comment.