diff --git a/examples/adb-exfiltration-protection/README.md b/examples/adb-exfiltration-protection/README.md index 908ba217..c4ad741a 100644 --- a/examples/adb-exfiltration-protection/README.md +++ b/examples/adb-exfiltration-protection/README.md @@ -22,13 +22,15 @@ Resources to be created: ## How to use -1. Update `terraform.tfvars` file and provide values to each defined variable +1. Update `terraform.tfvars` file and provide values to each defined variable. 2. (Optional) Configure your [remote backend](https://developer.hashicorp.com/terraform/language/settings/backends/azurerm) 3. Run `terraform init` to initialize terraform and get provider ready. 4. Run `terraform apply` to create the resources. ## How to fill in variable values +Some variables have no default value and will require one, e.g. `subscription_id` + Most of the values are to be found at: https://docs.microsoft.com/en-us/azure/databricks/administration-guide/cloud-configurations/azure/udr In `variables.tfvars`, set these variables: @@ -47,16 +49,17 @@ firewallfqdn = ["dbartifactsprodseap.blob.core.windows.net","dbartifactsprodeap. | Name | Version | | ---------------------------------------------------------------------------- | ------- | -| [azurerm](#requirement\_azurerm) | =2.83.0 | -| [databricks](#requirement\_databricks) | 0.3.10 | +| [azurerm](#requirement\_azurerm) | >=4.0.0 | +| [databricks](#requirement\_databricks) | >=1.52.0| ## Providers | Name | Version | | ---------------------------------------------------------------- | ------- | -| [azurerm](#provider\_azurerm) | 2.83.0 | -| [external](#provider\_external) | 2.2.0 | -| [random](#provider\_random) | 3.1.0 | +| [azurerm](#provider\_azurerm) | 4.9.0 | +| [external](#provider\_external) | 1.58.0 | +| [random](#provider\_random) | 3.6.3 | +| [dns](#provider\_dns) | 3.4.2 | ## Modules @@ -95,11 +98,11 @@ No modules. | Name | Description | Type | Default | Required | | -------------------------------------------------------------------------------------------------------------- | ----------- | ----------- | ----------------- | :------: | +| [subscription\_id](#input\_subscription\_id) | n/a | `string` | n/a | yes | | [dbfs\_prefix](#input\_dbfs\_prefix) | n/a | `string` | `"dbfs"` | no | | [firewallfqdn](#input\_firewallfqdn) | n/a | `list(any)` | n/a | yes | | [hubcidr](#input\_hubcidr) | n/a | `string` | `"10.178.0.0/20"` | no | | [metastoreip](#input\_metastoreip) | n/a | `string` | n/a | yes | -| [no\_public\_ip](#input\_no\_public\_ip) | n/a | `bool` | `true` | no | | [private\_subnet\_endpoints](#input\_private\_subnet\_endpoints) | n/a | `list` | `[]` | no | | [rglocation](#input\_rglocation) | n/a | `string` | `"southeastasia"` | no | | [sccip](#input\_sccip) | n/a | `string` | n/a | yes | @@ -111,11 +114,7 @@ No modules. | Name | Description | | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -| [arm\_client\_id](#output\_arm\_client\_id) | n/a | -| [arm\_subscription\_id](#output\_arm\_subscription\_id) | n/a | -| [arm\_tenant\_id](#output\_arm\_tenant\_id) | n/a | -| [azure\_region](#output\_azure\_region) | n/a | -| [databricks\_azure\_workspace\_resource\_id](#output\_databricks\_azure\_workspace\_resource\_id) | n/a | -| [resource\_group](#output\_resource\_group) | n/a | +| [azure\_resource\_group\_id](#output\_azure\_resource\_group\_id) | n/a | +| [workspace\_id](#output\_workspace\_id) | n/a | | [workspace\_url](#output\_workspace\_url) | n/a | diff --git a/examples/adb-exfiltration-protection/main.tf b/examples/adb-exfiltration-protection/main.tf index f51fd05a..dbc58cc5 100644 --- a/examples/adb-exfiltration-protection/main.tf +++ b/examples/adb-exfiltration-protection/main.tf @@ -1,20 +1,7 @@ -/** - * Azure Databricks workspace in custom VNet with traffic routed via firewall in the Hub VNet - * - * Module creates: - * * Resource group with random prefix - * * Tags, including `Owner`, which is taken from `az account show --query user` - * * VNet with public and private subnet for Databricks - * * VNet with subnet for deployment of Azure Firewall - * * Azure Firewall with access enabled to Databricks-related resources - * * Databricks workspace - */ - module "adb-exfiltration-protection" { source = "../../modules/adb-exfiltration-protection" hubcidr = var.hubcidr spokecidr = var.spokecidr - no_public_ip = var.no_public_ip rglocation = var.rglocation metastore = var.metastore scc_relay = var.scc_relay diff --git a/examples/adb-exfiltration-protection/outputs.tf b/examples/adb-exfiltration-protection/outputs.tf new file mode 100644 index 00000000..5560ed19 --- /dev/null +++ b/examples/adb-exfiltration-protection/outputs.tf @@ -0,0 +1,14 @@ +output "azure_resource_group_id" { + description = "ID of the created Azure resource group" + value = module.adb-exfiltration-protection.azure_resource_group_id +} + +output "workspace_id" { + description = "The Databricks workspace ID" + value = module.adb-exfiltration-protection.workspace_id +} + +output "workspace_url" { + description = "The Databricks workspace URL" + value = module.adb-exfiltration-protection.workspace_url +} \ No newline at end of file diff --git a/modules/adb-exfiltration-protection/versions.tf b/examples/adb-exfiltration-protection/providers.tf similarity index 67% rename from modules/adb-exfiltration-protection/versions.tf rename to examples/adb-exfiltration-protection/providers.tf index 3d94bf15..138fd336 100644 --- a/modules/adb-exfiltration-protection/versions.tf +++ b/examples/adb-exfiltration-protection/providers.tf @@ -1,13 +1,12 @@ -# versions.tf terraform { required_providers { - databricks = { - source = "databricks/databricks" - version = ">=1.20.0" - } azurerm = { source = "hashicorp/azurerm" - version = ">=2.83.0" + version = ">=4.0.0" + } + databricks = { + source = "databricks/databricks" + version = ">=1.52.0" } random = { source = "hashicorp/random" @@ -17,3 +16,8 @@ terraform { } } } + +provider "azurerm" { + subscription_id = var.subscription_id + features {} +} diff --git a/examples/adb-exfiltration-protection/terraform.tfvars b/examples/adb-exfiltration-protection/terraform.tfvars index a5ae6e1d..f8297e4b 100644 --- a/examples/adb-exfiltration-protection/terraform.tfvars +++ b/examples/adb-exfiltration-protection/terraform.tfvars @@ -1,11 +1,14 @@ -hubcidr = "10.178.0.0/20" -spokecidr = "10.179.0.0/20" -no_public_ip = true -rglocation = "westeurope" +subscription_id = "" +dbfs_prefix = "dbfs" +workspace_prefix = "adb" +hubcidr = "10.178.0.0/20" +spokecidr = "10.179.0.0/20" +rglocation = "westeurope" + # We can pull this information automatically, i.e. from # https://github.com/microsoft/AzureTRE/blob/main/templates/workspace_services/databricks/terraform/databricks-udr.json # that is maintained by Microsoft team (although it may not be updated immediately). -metastore = [ +metastore = [ "consolidated-westeurope-prod-metastore.mysql.database.azure.com", "consolidated-westeurope-prod-metastore-addl-1.mysql.database.azure.com", "consolidated-westeurope-prod-metastore-addl-2.mysql.database.azure.com", @@ -15,24 +18,23 @@ metastore = [ "consolidated-westeuropec2-prod-metastore-2.mysql.database.azure.com", "consolidated-westeuropec2-prod-metastore-3.mysql.database.azure.com", ] + // get from https://learn.microsoft.com/en-us/azure/databricks/resources/supported-regions#--metastore-artifact-blob-storage-system-tables-blob-storage-log-blob-storage-and-event-hub-endpoint-ip-addresses -scc_relay = [ +scc_relay = [ "tunnel.westeurope.azuredatabricks.net", "tunnel.westeuropec2.azuredatabricks.net" ] -webapp_ips = [ +webapp_ips = [ "52.232.19.246/32", "40.74.30.80/32", "20.103.219.240/28", "4.150.168.160/28", ] -eventhubs = [ +eventhubs = [ "prod-westeurope-observabilityeventhubs.servicebus.windows.net", "prod-westeuc2-observabilityeventhubs.servicebus.windows.net", ] -dbfs_prefix = "dbfs" -workspace_prefix = "adb" -firewallfqdn = [ // dbfs rule will be added - depends on dbfs storage name +firewallfqdn = [ // dbfs rule will be added - depends on dbfs storage name "dbartifactsprodwesteu.blob.core.windows.net", //databricks artifacts "arprodwesteua1.blob.core.windows.net", "arprodwesteua2.blob.core.windows.net", diff --git a/examples/adb-exfiltration-protection/variables.tf b/examples/adb-exfiltration-protection/variables.tf index a32da8d2..659360a3 100644 --- a/examples/adb-exfiltration-protection/variables.tf +++ b/examples/adb-exfiltration-protection/variables.tf @@ -1,3 +1,8 @@ +variable "subscription_id" { + type = string + description = "Azure Subscription ID to deploy the workspace into" +} + variable "hubcidr" { description = "IP range for creaiton of the Spoke VNet" type = string @@ -10,12 +15,6 @@ variable "spokecidr" { default = "10.179.0.0/20" } -variable "no_public_ip" { - description = "If workspace should be created with No-Public-IP" - type = bool - default = true -} - variable "rglocation" { description = "Location of resource group" type = string diff --git a/modules/adb-exfiltration-protection/README.md b/modules/adb-exfiltration-protection/README.md index abf471d7..442e178f 100644 --- a/modules/adb-exfiltration-protection/README.md +++ b/modules/adb-exfiltration-protection/README.md @@ -1,17 +1,13 @@ # Provisioning Azure Databricks workspace with a Hub & Spoke firewall for data exfiltration protection -This template provides an example deployment of: Hub-Spoke networking with egress firewall to control all outbound traffic from Databricks subnets. Details are described in: https://databricks.com/blog/2020/03/27/data-exfiltration-protection-with-azure-databricks.html +This module will create Azure Databricks workspace with a Hub & Spoke firewall for data exfiltration protection. -With this setup, you can setup firewall rules to block / allow egress traffic from your Databricks clusters. You can also use firewall to block all access to storage accounts, and use private endpoint connection to bypass this firewall, such that you allow access only to specific storage accounts. +## Module content - -To find IP and FQDN for your deployment, go to: https://docs.microsoft.com/en-us/azure/databricks/administration-guide/cloud-configurations/azure/udr - -## Overall Architecture +This module can be used to deploy the following: ![alt text](https://raw.githubusercontent.com/databricks/terraform-databricks-examples/main/modules/adb-exfiltration-protection/images/adb-exfiltration-classic.png?raw=true) -Resources to be created: * Resource group with random prefix * Tags, including `Owner`, which is taken from `az account show --query user` * Hub-Spoke topology, with hub firewall in hub vnet's subnet. @@ -32,22 +28,6 @@ Resources to be created: 6. Run `terraform init` to initialize terraform and get provider ready. 7. Run `terraform apply` to create the resources. - -## How to fill in variable values - -Most of the values are to be found at: https://learn.microsoft.com/en-us/azure/databricks/resources/supported-regions and https://docs.microsoft.com/en-us/azure/databricks/administration-guide/cloud-configurations/azure/udr - -In `variables.tfvars`, set these variables (bigger regions have multiple instances of each service): - -```hcl -metastore = ["consolidated-westeurope-prod-metastore.mysql.database.azure.com"] -scc_relay = ["tunnel.westeurope.azuredatabricks.net"] -webapp_ips = ["52.230.27.216/32"] # given at UDR page -eventhubs = ["prod-westeurope-observabilityeventhubs.servicebus.windows.net"] -# find these for your region, follow Databricks blog tutorial. -firewallfqdn = ["dbartifactsprodseap.blob.core.windows.net","dbartifactsprodeap.blob.core.windows.net","dblogprodseasia.blob.core.windows.net","cdnjs.com"] -``` - ## Requirements @@ -121,11 +101,13 @@ No modules. | Name | Description | | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -| [arm\_client\_id](#output\_arm\_client\_id) | n/a | -| [arm\_subscription\_id](#output\_arm\_subscription\_id) | n/a | -| [arm\_tenant\_id](#output\_arm\_tenant\_id) | n/a | -| [azure\_region](#output\_azure\_region) | n/a | -| [databricks\_azure\_workspace\_resource\_id](#output\_databricks\_azure\_workspace\_resource\_id) | n/a | -| [resource\_group](#output\_resource\_group) | n/a | +| [arm\_client\_id](#output\_arm\_client\_id) | Deprecated | +| [arm\_subscription\_id](#output\_arm\_subscription\_id) | Deprecated | +| [arm\_tenant\_id](#output\_arm\_tenant\_id) | Deprecated | +| [azure\_region](#output\_azure\_region) | Deprecated | +| [databricks\_azure\_workspace\_resource\_id](#output\_databricks\_azure\_workspace\_resource\_id) | Deprecated | +| [resource\_group](#output\_resource\_group) | Deprecated | | [workspace\_url](#output\_workspace\_url) | n/a | +| [resource\_group\_id](#output\_resource\_group\_id) | n/a | +| [resource\_workspace\_id](#output\_resource\_workspace\_id) | n/a | diff --git a/modules/adb-exfiltration-protection/main.tf b/modules/adb-exfiltration-protection/main.tf index b59d28fb..6a66df9a 100644 --- a/modules/adb-exfiltration-protection/main.tf +++ b/modules/adb-exfiltration-protection/main.tf @@ -1,16 +1,3 @@ -/** - * Azure Databricks workspace in custom VNet - * - * Module creates: - * * Resource group with random prefix - * * Tags, including `Owner`, which is taken from `az account show --query user` - * * VNet with public and private subnet - * * Databricks workspace - */ -provider "azurerm" { - features {} -} - resource "random_string" "naming" { special = false upper = false @@ -44,23 +31,3 @@ resource "azurerm_resource_group" "this" { location = local.location tags = local.tags } - -output "arm_client_id" { - value = data.azurerm_client_config.current.client_id -} - -output "arm_subscription_id" { - value = data.azurerm_client_config.current.subscription_id -} - -output "arm_tenant_id" { - value = data.azurerm_client_config.current.tenant_id -} - -output "azure_region" { - value = local.location -} - -output "resource_group" { - value = azurerm_resource_group.this.name -} diff --git a/modules/adb-exfiltration-protection/outputs.tf b/modules/adb-exfiltration-protection/outputs.tf new file mode 100644 index 00000000..37cee094 --- /dev/null +++ b/modules/adb-exfiltration-protection/outputs.tf @@ -0,0 +1,44 @@ +output "databricks_azure_workspace_resource_id" { + description = "**Deprecated** The ID of the Databricks Workspace in the Azure management plane" + value = azurerm_databricks_workspace.this.id +} + +output "arm_client_id" { + description = "**Deprecated**" + value = data.azurerm_client_config.current.client_id +} + +output "arm_subscription_id" { + description = "**Deprecated**" + value = data.azurerm_client_config.current.subscription_id +} + +output "arm_tenant_id" { + description = "**Deprecated**" + value = data.azurerm_client_config.current.tenant_id +} + +output "azure_region" { + description = "**Deprecated**" + value = local.location +} + +output "resource_group" { + description = "**Deprecated**" + value = azurerm_resource_group.this.name +} + +output "workspace_url" { + description = "The Databricks workspace URL" + value = "https://${azurerm_databricks_workspace.this.workspace_url}/" +} + +output "azure_resource_group_id" { + description = "ID of the created Azure resource group" + value = azurerm_resource_group.this.id +} + +output "workspace_id" { + description = "The Databricks workspace ID" + value = azurerm_databricks_workspace.this.workspace_id +} \ No newline at end of file diff --git a/examples/adb-exfiltration-protection/versions.tf b/modules/adb-exfiltration-protection/providers.tf similarity index 79% rename from examples/adb-exfiltration-protection/versions.tf rename to modules/adb-exfiltration-protection/providers.tf index 3d94bf15..3c762468 100644 --- a/examples/adb-exfiltration-protection/versions.tf +++ b/modules/adb-exfiltration-protection/providers.tf @@ -1,13 +1,12 @@ -# versions.tf terraform { required_providers { databricks = { source = "databricks/databricks" - version = ">=1.20.0" + version = ">=1.52.0" } azurerm = { source = "hashicorp/azurerm" - version = ">=2.83.0" + version = ">=4.0.0" } random = { source = "hashicorp/random" diff --git a/modules/adb-exfiltration-protection/variables.tf b/modules/adb-exfiltration-protection/variables.tf index 1dbfd887..eb81b769 100644 --- a/modules/adb-exfiltration-protection/variables.tf +++ b/modules/adb-exfiltration-protection/variables.tf @@ -10,12 +10,6 @@ variable "spokecidr" { default = "10.179.0.0/20" } -variable "no_public_ip" { - description = "If workspace should be created with No-Public-IP" - type = bool - default = true -} - variable "rglocation" { description = "Location of resource group" type = string diff --git a/modules/adb-exfiltration-protection/workspace.tf b/modules/adb-exfiltration-protection/workspace.tf index fb58f3fe..ae958d45 100644 --- a/modules/adb-exfiltration-protection/workspace.tf +++ b/modules/adb-exfiltration-protection/workspace.tf @@ -4,9 +4,7 @@ resource "azurerm_databricks_workspace" "this" { location = azurerm_resource_group.this.location sku = "premium" tags = local.tags - //infrastructure_encryption_enabled = true custom_parameters { - no_public_ip = var.no_public_ip virtual_network_id = azurerm_virtual_network.this.id private_subnet_name = azurerm_subnet.private.name public_subnet_name = azurerm_subnet.public.name @@ -20,15 +18,3 @@ resource "azurerm_databricks_workspace" "this" { azurerm_subnet_network_security_group_association.private ] } - - -output "databricks_azure_workspace_resource_id" { - // The ID of the Databricks Workspace in the Azure management plane. - value = azurerm_databricks_workspace.this.id -} - -output "workspace_url" { - // The workspace URL which is of the format 'adb-{workspaceId}.{random}.azuredatabricks.net' - // this is not named as DATABRICKS_HOST, because it affect authentication - value = "https://${azurerm_databricks_workspace.this.workspace_url}/" -}