Skip to content
This repository has been archived by the owner on Mar 20, 2023. It is now read-only.

Commit

Permalink
Support max increment of VMs in scenario autoscale
Browse files Browse the repository at this point in the history
- Allow definition of weekdays/workhours
- Resolves #210
  • Loading branch information
alfpark committed Jun 7, 2018
1 parent e65dd9c commit 831e2ae
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 32 deletions.
14 changes: 12 additions & 2 deletions config_templates/pool.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,29 @@ pool_specification:
resize_timeout: 00:20:00
node_fill_type: pack
autoscale:
evaluation_interval: 00:05:00
evaluation_interval: 00:15:00
scenario:
name: active_tasks
maximum_vm_count:
dedicated: 16
low_priority: 8
maximum_vm_increment_per_evaluation:
dedicated: 4
low_priority: -1
node_deallocation_option: taskcompletion
sample_lookback_interval: 00:10:00
required_sample_percentage: 70
bias_last_sample: true
bias_node_type: low_priority
rebalance_preemption_percentage: 50
formula: ''
time_ranges:
weekdays:
start: 1
end: 5
work_hours:
start: 8
end: 17
formula: null
inter_node_communication_enabled: true
reboot_on_start_task_failed: false
attempt_recovery_on_unusable: false
Expand Down
69 changes: 59 additions & 10 deletions convoy/autoscale.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@
'min_target_low_priority',
'max_target_dedicated',
'max_target_low_priority',
'max_inc_dedicated',
'max_inc_low_priority',
'weekday_start',
'weekday_end',
'workhour_start',
'workhour_end',
]
)

Expand Down Expand Up @@ -142,19 +148,28 @@ def _formula_tasks(pool):
'redistVMs = rebalance ? min(preemptcount, remainingVMs) : 0',
'dedicatedVMs = min(maxTargetDedicated, '
'dedicatedVMs + redistVMs + minTargetDedicated)',
'dedicatedVMs = min($CurrentDedicatedNodes + maxIncDedicated, '
'dedicatedVMs)',
'remainingVMs = max(0, reqVMs - dedicatedVMs)',
'lowPriVMs = min(maxTargetLowPriority, '
'remainingVMs + minTargetLowPriority)',
'lowPriVMs = min($CurrentLowPriorityNodes + maxIncLowPriority, '
'lowPriVMs)',
'$TargetDedicatedNodes = dedicatedVMs',
'$TargetLowPriorityNodes = lowPriVMs',
]
elif pool.autoscale.scenario.bias_node_type == 'dedicated':
target_vms = [
'dedicatedVMs = min(maxTargetDedicated, reqVMs)',
'dedicatedVMs = min($CurrentDedicatedNodes + maxIncDedicated, '
'dedicatedVMs)',
'remainingVMs = max(0, reqVMs - dedicatedVMs)',
'$TargetDedicatedNodes = dedicatedVMs',
'$TargetLowPriorityNodes = min(maxTargetLowPriority, '
'lowPriVMs = min(maxTargetLowPriority, '
'remainingVMs + minTargetLowPriority)',
'lowPriVMs = min($CurrentLowPriorityNodes + maxIncLowPriority, '
'lowPriVMs)',
'$TargetDedicatedNodes = dedicatedVMs',
'$TargetLowPriorityNodes = lowPriVMs',
]
elif pool.autoscale.scenario.bias_node_type == 'low_priority':
target_vms = [
Expand All @@ -163,10 +178,15 @@ def _formula_tasks(pool):
'redistVMs = rebalance ? min(preemptcount, remainingVMs) : 0',
'lowPriVMs = max(minTargetLowPriority, '
'reqVMs - redistVMs + minTargetLowPriority)',
'lowPriVMs = min($CurrentLowPriorityNodes + maxIncLowPriority, '
'lowPriVMs)',
'remainingVMs = max(0, reqVMs - lowPriVMs)',
'$TargetLowPriorityNodes = lowPriVMs',
'$TargetDedicatedNodes = min(maxTargetDedicated, '
'dedicatedVMs = min(maxTargetDedicated, '
'remainingVMs + minTargetDedicated)',
'dedicatedVMs = min($CurrentDedicatedNodes + maxIncDedicated, '
'dedicatedVMs)',
'$TargetLowPriorityNodes = lowPriVMs',
'$TargetDedicatedNodes = dedicatedVMs',
]
else:
raise ValueError(
Expand All @@ -179,6 +199,8 @@ def _formula_tasks(pool):
'minTargetLowPriority = {}'.format(minmax.min_target_low_priority),
'maxTargetDedicated = {}'.format(minmax.max_target_dedicated),
'maxTargetLowPriority = {}'.format(minmax.max_target_low_priority),
'maxIncDedicated = {}'.format(minmax.max_inc_dedicated),
'maxIncLowPriority = {}'.format(minmax.max_inc_low_priority),
req_vms,
target_vms,
'$NodeDeallocationOption = {}'.format(
Expand All @@ -198,16 +220,20 @@ def _formula_day_of_week(pool):
if pool.autoscale.scenario.name == 'workday':
target_vms = [
'now = time()',
'isWorkHours = now.hour >= 8 && now.hour < 18',
'isWeekday = now.weekday >= 1 && now.weekday <= 5',
'isWorkHours = now.hour >= workhourStart && '
'now.hour <= workhourEnd',
'isWeekday = now.weekday >= weekdayStart && '
'now.weekday <= weekdayEnd',
'isPeakTime = isWeekday && isWorkHours',
]
elif (pool.autoscale.scenario.name ==
'workday_with_offpeak_max_low_priority'):
target_vms = [
'now = time()',
'isWorkHours = now.hour >= 8 && now.hour < 18',
'isWeekday = now.weekday >= 1 && now.weekday <= 5',
'isWorkHours = now.hour >= workhourStart && '
'now.hour <= workhourEnd',
'isWeekday = now.weekday >= weekdayStart && '
'now.weekday <= weekdayEnd',
'isPeakTime = isWeekday && isWorkHours',
'$TargetLowPriorityNodes = maxTargetLowPriority',
]
Expand All @@ -220,12 +246,14 @@ def _formula_day_of_week(pool):
elif pool.autoscale.scenario.name == 'weekday':
target_vms = [
'now = time()',
'isPeakTime = now.weekday >= 1 && now.weekday <= 5',
'isPeakTime = now.weekday >= weekdayStart && '
'now.weekday <= weekdayEnd',
]
elif pool.autoscale.scenario.name == 'weekend':
target_vms = [
'now = time()',
'isPeakTime = now.weekday >= 6 && now.weekday <= 7',
'isPeakTime = now.weekday < weekdayStart && '
'now.weekday > weekdayEnd',
]
else:
raise ValueError('autoscale scenario name invalid: {}'.format(
Expand Down Expand Up @@ -259,6 +287,10 @@ def _formula_day_of_week(pool):
'minTargetLowPriority = {}'.format(minmax.min_target_low_priority),
'maxTargetDedicated = {}'.format(minmax.max_target_dedicated),
'maxTargetLowPriority = {}'.format(minmax.max_target_low_priority),
'weekdayStart = {}'.format(minmax.weekday_start),
'weekdayEnd = {}'.format(minmax.weekday_end),
'workhourStart = {}'.format(minmax.workhour_start),
'workhourEnd = {}'.format(minmax.workhour_end),
target_vms,
'$NodeDeallocationOption = {}'.format(
pool.autoscale.scenario.node_deallocation_option),
Expand Down Expand Up @@ -291,12 +323,29 @@ def _get_minmax(pool):
raise ValueError(
'min target low priority {} > max target low priority {}'.format(
min_target_low_priority, max_target_low_priority))
max_inc_dedicated = (
pool.autoscale.scenario.maximum_vm_increment_per_evaluation.dedicated
)
max_inc_low_priority = (
pool.autoscale.scenario.
maximum_vm_increment_per_evaluation.low_priority
)
if max_inc_dedicated <= 0:
max_inc_dedicated = _UNBOUND_MAX_NODES
if max_inc_low_priority <= 0:
max_inc_low_priority = _UNBOUND_MAX_NODES
return AutoscaleMinMax(
max_tasks_per_node=pool.max_tasks_per_node,
min_target_dedicated=min_target_dedicated,
min_target_low_priority=min_target_low_priority,
max_target_dedicated=max_target_dedicated,
max_target_low_priority=max_target_low_priority,
max_inc_dedicated=max_inc_dedicated,
max_inc_low_priority=max_inc_low_priority,
weekday_start=pool.autoscale.scenario.weekday_start,
weekday_end=pool.autoscale.scenario.weekday_end,
workhour_start=pool.autoscale.scenario.workhour_start,
workhour_end=pool.autoscale.scenario.workhour_end,
)


Expand Down
16 changes: 16 additions & 0 deletions convoy/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,17 @@
'PoolAutoscaleScenarioSettings', [
'name',
'maximum_vm_count',
'maximum_vm_increment_per_evaluation',
'node_deallocation_option',
'sample_lookback_interval',
'required_sample_percentage',
'rebalance_preemption_percentage',
'bias_last_sample',
'bias_node_type',
'weekday_start',
'weekday_end',
'workhour_start',
'workhour_end',
]
)
PoolAutoscaleSettings = collections.namedtuple(
Expand Down Expand Up @@ -965,6 +970,8 @@ def pool_autoscale_settings(config):
mvc = _kv_read_checked(scenconf, 'maximum_vm_count')
if mvc is None:
raise ValueError('maximum_vm_count must be specified')
mvipe = _kv_read_checked(
scenconf, 'maximum_vm_increment_per_evaluation', default={})
ndo = _kv_read_checked(
scenconf, 'node_deallocation_option', 'taskcompletion')
if (ndo is not None and
Expand All @@ -977,9 +984,14 @@ def pool_autoscale_settings(config):
sli = util.convert_string_to_timedelta(sli)
else:
sli = datetime.timedelta(minutes=10)
tr = _kv_read_checked(scenconf, 'time_ranges', default={})
trweekday = _kv_read_checked(tr, 'weekdays', default={})
trworkhour = _kv_read_checked(tr, 'work_hours', default={})
scenario = PoolAutoscaleScenarioSettings(
name=_kv_read_checked(scenconf, 'name').lower(),
maximum_vm_count=_pool_vm_count(config, conf=mvc),
maximum_vm_increment_per_evaluation=_pool_vm_count(
config, conf=mvipe),
node_deallocation_option=ndo,
sample_lookback_interval=sli,
required_sample_percentage=_kv_read(
Expand All @@ -990,6 +1002,10 @@ def pool_autoscale_settings(config):
scenconf, 'bias_last_sample', True),
bias_node_type=_kv_read_checked(
scenconf, 'bias_node_type', 'auto').lower(),
weekday_start=_kv_read(trweekday, 'start', default=1),
weekday_end=_kv_read(trweekday, 'end', default=5),
workhour_start=_kv_read(trworkhour, 'start', default=8),
workhour_end=_kv_read(trworkhour, 'end', default=17),
)
else:
scenario = None
Expand Down
40 changes: 37 additions & 3 deletions docs/13-batch-shipyard-configuration-pool.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,29 @@ pool_specification:
resize_timeout: 00:20:00
node_fill_type: pack
autoscale:
evaluation_interval: 00:05:00
evaluation_interval: 00:15:00
scenario:
name: active_tasks
maximum_vm_count:
dedicated: 16
low_priority: 8
maximum_vm_increment_per_evaluation:
dedicated: 4
low_priority: -1
node_deallocation_option: taskcompletion
sample_lookback_interval: 00:10:00
required_sample_percentage: 70
bias_last_sample: true
bias_node_type: low_priority
rebalance_preemption_percentage: 50
formula: ''
time_ranges:
weekdays:
start: 1
end: 5
work_hours:
start: 8
end: 17
formula: null
inter_node_communication_enabled: true
reboot_on_start_task_failed: false
attempt_recovery_on_unusable: false
Expand Down Expand Up @@ -217,7 +227,9 @@ each node type for `scenario` based autoscale.
timedelta with a string representation of "d.HH:mm:ss". "HH:mm:ss" is
required, but "d" is optional, if specified. If not specified, the
default is 15 minutes. The smallest value that can be specified is 5
minutes.
minutes. Use caution when specifying a small `evaluation_interval`
values which can cause pool resizing errors and instability with
volatile target counts.
* (optional) `scenario` is a pre-set autoscale scenario where a formula
will be generated with the parameters specified within this property.
* (required) `name` is the autoscale scenario name to apply. Valid
Expand All @@ -234,6 +246,14 @@ each node type for `scenario` based autoscale.
nodes that can be allocated.
* (optional) `low_priority` is the maximum number of low priority
compute nodes that can be allocated.
* (optional) `maximum_vm_increment_per_evaluation` is the maximum
amount of VMs to increase per evaluation. Specifying a non-positive
value (i.e., less than or equal to `0`) for either of the following
properties will result in effectively no increment limit.
* (optional) `dedicated` is the maximum increase in VMs per
evaluation.
* (optional) `low_priority` is the maximum increase in VMs per
evaluation.
* (optional) `node_deallocation_option` is the node deallocation option
to apply. When a pool is resized down and a node is selected for
removal, what action is performed for the running task is specified
Expand Down Expand Up @@ -264,6 +284,20 @@ each node type for `scenario` based autoscale.
count reaches the indicated threshold percentage of the total
current dedicated and low priority nodes. The default is `null`
or no rebalancing is performed.
* (optional) `time_ranges` defines the time ranges for the day-of-week
based scenarios.
* (optional) `weekdays` defines the days of the week which should
be considered weekdays, where `1` = Monday.
* (optional) `start` defines the inclusive start weekday day
of the week as an integer.
* (optional) `end` defines the inclusive end weekday day
of the week as an integer.
* (optional) `work_hours` defines the hours of the day in the
work day with a range from `0` to `23`, inclusive.
* (optional) `start` defines the inclusive start hour of
the work day as an integer.
* (optional) `end` defines the inclusive end hour of
the work day as an integer.
* (optional) `formula` is a custom autoscale formula to apply to the pool.
If both `formula` and `scenario` are specified, then `formula` is used.
* (optional) `inter_node_communication_enabled` designates if this pool is set
Expand Down
28 changes: 15 additions & 13 deletions docs/30-batch-shipyard-autoscale.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,9 @@ words, "tasks with satisified dependencies awaiting node assignment".
pending tasks for the pool. Tasks categorized under this metric are
tasks in active state with satisifed dependencies and running
tasks, in other words, "tasks pending completion".
* `workday` will autoscale the pool according to Monday-Friday workdays.
* `workday` will autoscale the pool according to the workdays specified.
* `workday_with_offpeak_max_low_priority` will autoscale the pool according
to Monday-Friday workdays and for off work time, use maximum number of
low priority nodes.
to workdays and for off work time, use maximum number of low priority nodes.
* `weekday` will autoscale the pool if it is a weekday.
* `weekend` will autoscale the pool if it is a weekend.

Expand All @@ -69,7 +68,9 @@ pool to resize down to zero nodes.
Additionally, there are options that can modify and fine-tune these scenarios
as needed:

* `node_deallocation_option` which specify when a node is targeted for
* `maximum_vm_increment_per_evaluation` sets limits on the maximum amount
of dedicated or low priority VMs to increase after an evaluation.
* `node_deallocation_option` which specifies when a node is targeted for
deallocation but has a running task, what should be the action applied to
the task: `requeue`, `terminate`, `taskcompletion`, and `retaineddata`.
Please see [this doc](https://docs.microsoft.com/azure/batch/batch-automatic-scaling#variables)
Expand Down Expand Up @@ -97,7 +98,7 @@ This applies only to `active_tasks` and `pending_tasks` scenarios.
An example autoscale specification in the pool configuration may be:
```yaml
autoscale:
evaluation_interval: 00:05:00
evaluation_interval: 00:10:00
scenario:
name: active_tasks
maximum_vm_count:
Expand All @@ -106,15 +107,16 @@ An example autoscale specification in the pool configuration may be:
```
This example would apply the `active_tasks` scenario to the associated
pool with an evaluation interval of every 5 minutes. This means that the
pool with an evaluation interval of every 10 minutes. This means that the
autoscale formula is evaluated by the service and can have updates applied
every 5 minutes. Note that having a small evaluation interval may result
in undesirable behavior of the pool being resized constantly (or even
resize failures if the prior resize is still ongoing when the autoscale
evaluation happens again and results in a different target node count).
The `active_tasks` scenario also includes a `maximum_vm_count` to ensure that
the autoscale formula does not result in target node counts that exceed
16 dedicated and 8 low priority nodes.
every 10 minutes. Note that having a small evaluation interval may result
in undesirable behavior of the pool being resized constantly. This can result
in pool stability issues including resize failures if the prior resize is
still ongoing when the autoscale evaluation happens again and results in
a different target node count. For this example, the `active_tasks` scenario
also includes a `maximum_vm_count` to ensure that the autoscale formula
does not result in target node counts that exceed 16 dedicated and 8 low
priority nodes.

### Formula-based Autoscaling
Formula-based autoscaling allows users with expertise in creating autoscale
Expand Down
Loading

0 comments on commit 831e2ae

Please sign in to comment.