Skip to content

Commit

Permalink
feat: create aks client ml UDP mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Denise Perez committed Nov 26, 2024
1 parent e1af521 commit 769f04a
Show file tree
Hide file tree
Showing 18 changed files with 783 additions and 11 deletions.
19 changes: 19 additions & 0 deletions aks.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module "aks_clients" {
count = var.aks_clients ? 1 : 0
source = "./modules/aks"
rg_name = var.rg_name
subnet_name = local.subnet_name
vnet_name = local.vnet_name
vnet_rg_name = local.vnet_rg_name
frontend_container_cores_num = var.aks_client_frontend_cores
instance_type = var.aks_client_instance_type
ssh_public_key = var.ssh_public_key == null ? tls_private_key.ssh_key[0].public_key_openssh : var.ssh_public_key
key_vault_name = azurerm_key_vault.key_vault.name
prefix = var.prefix
backend_vmss_name = local.vmss_name # <prefix>-<cluster-name>-vmss
subscription_id = var.subscription_id
node_count = var.aks_instances_number
create_ml = var.create_ml
cluster_name = var.cluster_name
depends_on = [module.network, azurerm_linux_function_app.function_app]
}
31 changes: 20 additions & 11 deletions examples/public_network/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,25 @@ provider "azurerm" {
}
}


module "weka_deployment" {
source = "../.."
prefix = "weka"
rg_name = "weka-rg"
get_weka_io_token = var.get_weka_io_token
subscription_id = var.subscription_id
cluster_name = "poc"
tiering_enable_obs_integration = true
cluster_size = 6
allow_ssh_cidrs = ["0.0.0.0/0"]
allow_weka_api_cidrs = ["0.0.0.0/0"]
assign_public_ip = true
source = "../.."
prefix = "weka"
rg_name = "aks"
get_weka_io_token = var.get_weka_io_token
subscription_id = var.subscription_id
cluster_name = "ml"
tiering_enable_obs_integration = true
cluster_size = 6
allow_ssh_cidrs = ["0.0.0.0/0"]
allow_weka_api_cidrs = ["0.0.0.0/0"]
assign_public_ip = true
address_space = "10.224.0.0/12"
subnet_prefix = "10.224.0.0/16"
function_app_subnet_delegation_cidr = "10.225.1.0/24"
logic_app_subnet_delegation_cidr = "10.225.2.0/24"
aks_clients = true
aks_instances_number = 3
aks_client_frontend_cores = 1
create_ml = true
}
118 changes: 118 additions & 0 deletions modules/aks/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
locals {
nics = var.frontend_container_cores_num + 1
script_path = "/tmp/update_aks_node_pool_${var.prefix}_${var.cluster_name}.sh"
}

data "azurerm_resource_group" "rg" {
name = var.rg_name
}

data "azurerm_subnet" "subnet" {
name = var.subnet_name
resource_group_name = var.rg_name
virtual_network_name = var.vnet_name
}


resource "azurerm_kubernetes_cluster" "k8s" {
name = "${var.prefix}-aks-cluster"
location = data.azurerm_resource_group.rg.location
resource_group_name = var.rg_name
dns_prefix = "${var.prefix}-aks-dns"
kubernetes_version = "1.28.10"
identity {
type = "SystemAssigned"
}

default_node_pool {
name = "agentpool"
vm_size = var.instance_type
node_count = 3
vnet_subnet_id = data.azurerm_subnet.subnet.id
only_critical_addons_enabled = true
os_sku = var.os_sku

}
linux_profile {
admin_username = var.vm_username
ssh_key {
key_data = var.ssh_public_key
}
}
network_profile {
network_plugin = "azure"
load_balancer_sku = "standard"
network_policy = "azure"
network_plugin_mode = "overlay"
}
lifecycle {
ignore_changes = all
}
depends_on = [data.azurerm_subnet.subnet]
}

resource "azurerm_kubernetes_cluster_node_pool" "pool" {
name = "clients"
kubernetes_cluster_id = azurerm_kubernetes_cluster.k8s.id
vm_size = var.instance_type
node_count = 0
vnet_subnet_id = data.azurerm_subnet.subnet.id
os_sku = var.os_sku
node_labels = {
"node" = "weka-client"
}

orchestrator_version = azurerm_kubernetes_cluster.k8s.kubernetes_version
lifecycle {
ignore_changes = all
}

depends_on = [azurerm_kubernetes_cluster.k8s]
}


resource "local_file" "config_yaml" {
content = nonsensitive(azurerm_kubernetes_cluster.k8s.kube_config_raw)
filename = "/tmp/${var.prefix}-kube-config.yaml"
depends_on = [azurerm_kubernetes_cluster.k8s]
}

# resource "null_resource" "weka_fs" {
# triggers = {
# always_run = timestamp()
# }
# provisioner "local-exec" {
# command = "${path.module}/run.sh ${var.rg_name} ${azurerm_kubernetes_cluster.k8s.name} ${var.key_vault_name} ${var.backend_vmss_name} ${var.subscription_id} ${local.nics} ${var.node_count} ${var.frontend_container_cores_num} ${path.module} \"${var.prefix}-workspace-ml2\""
# }
# depends_on = [azurerm_kubernetes_cluster_node_pool.pool]
# }

output "nodes_update_script" {
value = {
rg_name = var.rg_name
aks_cluster_name = azurerm_kubernetes_cluster.k8s.name
key_vault_name = var.key_vault_name
backend_vmss_name = var.backend_vmss_name
subscription_id = var.subscription_id
nics = local.nics
node_count = var.node_count
frontend_container_cores_num = var.frontend_container_cores_num
yamls_path = path.module
script_path = local.script_path
}
}

resource "local_file" "script" {
filename = local.script_path
content = templatefile("${path.module}/run.sh", {
rg_name = var.rg_name
aks_cluster_name = azurerm_kubernetes_cluster.k8s.name
key_vault_name = var.key_vault_name
backend_vmss_name = var.backend_vmss_name
subscription_id = var.subscription_id
nics = local.nics
node_count = var.node_count
frontend_container_cores_num = var.frontend_container_cores_num
yamls_path = path.module
})
}
42 changes: 42 additions & 0 deletions modules/aks/ml.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
resource "azurerm_storage_account" "sa" {
count = var.create_ml ? 1 : 0
name = "${var.prefix}mlsa"
location = data.azurerm_resource_group.rg.location
resource_group_name = var.rg_name
account_tier = "Standard"
account_replication_type = "GRS"
lifecycle {
ignore_changes = all
}
}

resource "azurerm_application_insights" "insights" {
count = var.create_ml ? 1 : 0
name = "${var.prefix}-workspace-insights"
location = data.azurerm_resource_group.rg.location
resource_group_name = var.rg_name
application_type = "web"
}

data "azurerm_key_vault" "vault" {
name = var.key_vault_name
resource_group_name = var.rg_name
}

resource "azurerm_machine_learning_workspace" "ml" {
count = var.create_ml ? 1 : 0
name = "${var.prefix}-workspace-ml"
location = data.azurerm_resource_group.rg.location
resource_group_name = var.rg_name
application_insights_id = azurerm_application_insights.insights[0].id
key_vault_id = data.azurerm_key_vault.vault.id
storage_account_id = azurerm_storage_account.sa[0].id
public_network_access_enabled = true

identity {
type = "SystemAssigned"
}
lifecycle {
ignore_changes = all
}
}
50 changes: 50 additions & 0 deletions modules/aks/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# output "aks_cluster_username" {
# value = azurerm_kubernetes_cluster.k8s.kube_config[0].username
# sensitive = true
# }
#
# output "aks_host" {
# value = azurerm_kubernetes_cluster.k8s.kube_config[0].host
# sensitive = true
# }
#
# output "kube_config" {
# value = nonsensitive(azurerm_kubernetes_cluster.k8s.kube_config_raw)
# sensitive = false
# }

# output "cluster_id" {
# value = azurerm_kubernetes_cluster.k8s.id
# }
#
# output "config_file" {
# value = local_file.config_yaml.filename
# }
#
# output "ml_uri" {
# value = var.create_ml ? azurerm_machine_learning_workspace.ml[0].discovery_url : null
# }
#
# output "ml_workspace_id" {
# value = var.create_ml ? azurerm_machine_learning_workspace.ml[0].workspace_id : null
# }
#
# output "aks_client_certificate" {
# value = azurerm_kubernetes_cluster.k8s.kube_config.0.client_certificate
# }
#
# output "aks_client_key" {
# value = azurerm_kubernetes_cluster.k8s.kube_config.0.client_key
# }
#
# output "aks_cluster_ca_certificate" {
# value = azurerm_kubernetes_cluster.k8s.kube_config.0.cluster_ca_certificate
# }
#
# output "aks_rg_name" {
# value = azurerm_kubernetes_cluster.k8s.resource_group_name
# }
#
# output "aks_weka_node_pool_name" {
# value = azurerm_kubernetes_cluster_node_pool.pool.name
# }
46 changes: 46 additions & 0 deletions modules/aks/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
set -ex

rg_name=${rg_name}
aks_cluster_name=${aks_cluster_name}
vault_name=${key_vault_name}
backend_vmss_name=${backend_vmss_name}
subscription_id=${subscription_id}
nics=${nics}
node_count=${node_count}
frontend_container_cores_num=${frontend_container_cores_num}
yamls_path=${yamls_path}

# install yq if not installed
if ! command -v yq &> /dev/null
then
echo "yq could not be found"
apt install yq -y || brew install yq || yum install yq -y || true
fi

# install jq if not installed
if ! command -v jq &> /dev/null
then
echo "jq could not be found"
apt install jq -y || brew install jq || yum install jq -y || true
fi

aks_rg_name=$(az aks show -n $aks_cluster_name -g $rg_name | jq -r ".nodeResourceGroup")
aks_vmss_name=$(az vmss list -g $aks_rg_name | jq -r ".[].name" | grep clients)

# Set aks credentials
az aks get-credentials --resource-group $rg_name --name $aks_cluster_name --overwrite-existing

# Config kube yaml
backend_ips=$(az vmss nic list -g $rg_name --vmss-name $backend_vmss_name --subscription $subscription_id --query "[].ipConfigurations[]" | jq -r '.[] | select(.name=="ipconfig0")'.privateIPAddress)
backend_ip=$(echo "$backend_ips" | head -n 1)

yq eval --inplace ".spec.template.spec.containers[].env[] |= select(.name == \"NICS\").value = \"$nics\"" $yamls_path/yamls/daemonset.yaml
yq eval --inplace ".spec.template.spec.containers[].env[] |= select(.name == \"BACKEND_IP\").value = \"$backend_ip\"" $yamls_path/yamls/daemonset.yaml
yq eval --inplace ".spec.template.spec.containers[].env[] |= select(.name == \"FRONTEND_CONTAINER_CORES_NUM\").value = \"$frontend_container_cores_num\"" $yamls_path/yamls/daemonset.yaml

kubectl apply -f $yamls_path/yamls/configmap.yaml
kubectl apply -f $yamls_path/yamls/daemonset.yaml

#scale up nodepool
az vmss scale --new-capacity $node_count --resource-group $aks_rg_name --name $aks_vmss_name

81 changes: 81 additions & 0 deletions modules/aks/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
variable "frontend_container_cores_num" {
type = number
default = 1
description = "Number of nics to set on each client vm"
}

variable "rg_name" {
type = string
description = "A predefined resource group in the Azure subscription."
}

variable "vm_username" {
type = string
description = "The user name for logging in to the virtual machines."
default = "weka"
}

variable "vnet_name" {
type = string
description = "The virtual network name."
}

variable "subnet_name" {
type = string
description = "The subnet names."
}

variable "ssh_public_key" {
type = string
description = "Ssh public key to pass to vms."
}

variable "vnet_rg_name" {
type = string
description = "Resource group name of vnet"
}

variable "node_count" {
type = number
description = "The initial quantity of nodes for the node pool."
default = 3
}

variable "instance_type" {
type = string
default = "Standard_L8s_v3"
}

variable "subscription_id" {
type = string
description = "Subscription id for deployment"
}

variable "key_vault_name" {
type = string
description = "Name of key vault"
}

variable "prefix" {
type = string
description = "Prefix for all resources"
}

variable "create_ml" {
type = bool
default = false
}

variable "backend_vmss_name" {
type = string
description = "Name of vmss backend"
}


variable "os_sku" {
type = string
default = "Ubuntu"
}
variable "cluster_name" {
type = string
}
Loading

0 comments on commit 769f04a

Please sign in to comment.