This repository has been archived by the owner on Feb 6, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 124
/
Copy pathdeployment.yml
155 lines (147 loc) · 7.85 KB
/
deployment.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
custom:
# Cluster configs for each environment
default-cluster-spec: &default-cluster-spec
spark_version: '11.0.x-cpu-ml-scala2.12'
node_type_id: 'i3.xlarge' # NOTE: this is an AWS-specific instance type. Change accordingly if running on Azure or GCP.
driver_node_type_id: 'i3.xlarge' # NOTE: this is an AWS-specific instance type. Change accordingly if running on Azure or GCP.
num_workers: 1
# To reduce start up time for each job, it is advisable to use a cluster pool. To do so involves supplying the following
# two fields with a pool_id to acquire both the driver and instances from.
# If driver_instance_pool_id and instance_pool_id are set, both node_type_id and driver_node_type_id CANNOT be supplied.
# As such, if providing a pool_id for driver and worker instances, please ensure that node_type_id and driver_node_type_id are not present
# driver_instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'
# instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'
dev-cluster-config: &dev-cluster-config
new_cluster:
<<: *default-cluster-spec
staging-cluster-config: &staging-cluster-config
new_cluster:
<<: *default-cluster-spec
prod-cluster-config: &prod-cluster-config
new_cluster:
<<: *default-cluster-spec
# Databricks Jobs definitions
# please note that we're using FUSE reference for config, and env files, hence we're going to load this file using its local FS path
environments:
dev:
strict_path_adjustment_policy: true
jobs:
- name: 'DEV-telco-churn-demo-setup'
<<: *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/demo_setup_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/demo_setup.yml']
- name: 'DEV-telco-churn-feature-table-creation'
<<: *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/feature_table_creator_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/feature_table_creator.yml']
- name: 'DEV-telco-churn-model-train'
<<:
- *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/model_train_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_train.yml']
- name: 'DEV-telco-churn-model-deployment'
<<:
- *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/model_deployment_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_deployment.yml']
- name: 'DEV-telco-churn-model-inference-batch'
<<:
- *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/model_inference_batch_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_inference_batch.yml']
- name: 'DEV-telco-churn-sample-integration-test'
<<:
- *dev-cluster-config
spark_python_task:
python_file: 'file://tests/integration/sample_test.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/sample_test.yml']
staging:
strict_path_adjustment_policy: true
jobs:
- name: 'STAGING-telco-churn-sample-integration-test'
<<:
- *staging-cluster-config
spark_python_task:
python_file: 'file://tests/integration/sample_test.py'
parameters: ['--env', 'file:fuse://conf/staging/.staging.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/sample_test.yml']
prod:
strict_path_adjustment_policy: true
jobs:
- name: 'PROD-telco-churn-demo-setup'
<<: *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/demo_setup_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/demo_setup.yml']
- name: 'PROD-telco-churn-initial-model-train-register'
tasks:
- task_key: 'demo-setup'
<<:
- *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/demo_setup_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/demo_setup.yml']
- task_key: 'feature-table-creation'
<<: *prod-cluster-config
depends_on:
- task_key: 'demo-setup'
spark_python_task:
python_file: 'file://telco_churn/pipelines/feature_table_creator_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/feature_table_creator.yml']
- task_key: 'model-train'
<<: *prod-cluster-config
depends_on:
- task_key: 'demo-setup'
- task_key: 'feature-table-creation'
spark_python_task:
python_file: 'file://telco_churn/pipelines/model_train_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_train.yml']
- name: 'PROD-telco-churn-model-train'
<<:
- *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/model_train_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_train.yml']
- name: 'PROD-telco-churn-model-deployment'
<<:
- *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/model_deployment_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_deployment.yml']
- name: 'PROD-telco-churn-model-inference-batch'
<<:
- *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/pipelines/model_inference_batch_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_inference_batch.yml']