Skip to content
This repository has been archived by the owner on Sep 9, 2020. It is now read-only.

Commit

Permalink
Merge branch 'master' into gh-37-autoscaler
Browse files Browse the repository at this point in the history
  • Loading branch information
jrasell committed Nov 5, 2019
2 parents 4efaba7 + 05ac7f0 commit 5789fd3
Show file tree
Hide file tree
Showing 9 changed files with 317 additions and 26 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## 0.4.0 (Unreleased)

IMPROVEMENTS:
* Metrics to measure policy backend request latencies [[GH-93]](https://github.com/jrasell/sherpa/pull/93)
* Metrics to measure scaling state backend request latencies[[GH-94]](https://github.com/jrasell/sherpa/pull/94)

REFACTOR:
* Move the system API endpoint into the `server/endpoints/v1` package [[GH-99]](https://github.com/jrasell/sherpa/pull/99)

## 0.3.0 (4 November, 2019)

IMPROVEMENTS:
Expand Down
183 changes: 183 additions & 0 deletions docs/configuration/telemetry.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,189 @@ Runtime metrics allow operators to get insight into how the Sherpa server proces
</tr>
</table>

# Policy Backend Metrics

Policy backend metrics allow operators to get insight into how the policy storage backend is functioning.

<table class="table table-bordered table-striped">
<tr>
<th>Metric</th>
<th>Description</th>
<th>Unit</th>
<th>Type</th>
</tr>
<tr>
<td>`sherpa.policy.memory.get_policies`</td>
<td>Time taken to list all stored scaling policies from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.memory.get_job_policy`</td>
<td>Time taken to get a job scaling policy from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.memory.get_job_group_policy`</td>
<td>Time taken to get a job group scaling policy from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.memory.put_job_policy`</td>
<td>Time taken to put a job scaling policy in the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.memory.put_job_group_policy`</td>
<td>Time taken to put a job group scaling policy in the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.memory.delete_job_policy`</td>
<td>Time taken to delete a job scaling policy from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.memory.delete_job_group_policy`</td>
<td>Time taken to delete a job group scaling policy from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.consul.get_policies`</td>
<td>Time taken to list all stored scaling policies from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.consul.get_job_policy`</td>
<td>Time taken to get a job scaling policy from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.consul.get_job_group_policy`</td>
<td>Time taken to get a job group scaling policy from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.consul.put_job_policy`</td>
<td>Time taken to put a job scaling policy in the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.consul.put_job_group_policy`</td>
<td>Time taken to put a job group scaling policy in the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.consul.delete_job_policy`</td>
<td>Time taken to delete a job scaling policy from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.policy.consul.delete_job_group_policy`</td>
<td>Time taken to delete a job group scaling policy from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
</table>


# Scaling State Backend Metrics

Scaling state backend metrics allow operators to get insight into how the scaling state backend is functioning.

<table class="table table-bordered table-striped">
<tr>
<th>Metric</th>
<th>Description</th>
<th>Unit</th>
<th>Type</th>
</tr>
<tr>
<td>`sherpa.scale.state.memory.get_events`</td>
<td>Time taken to list all stored scaling activities from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.get_event`</td>
<td>Time taken to get a stored scaling activity from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.get_latest_events`</td>
<td>Time taken to list the latest stored scaling activities from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.get_latest_event`</td>
<td>Time taken to get the latest scaling activity for a job group from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.put_event`</td>
<td>Time taken to put a scaling activity in the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.gc`</td>
<td>Time taken to run the scaling state garbage collector for the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.get_events`</td>
<td>Time taken to list all stored scaling activities from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.get_event`</td>
<td>Time taken to get a stored scaling activity from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.get_latest_events`</td>
<td>Time taken to list the latest stored scaling activities from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.get_latest_event`</td>
<td>Time taken to get the latest scaling activity for a job group from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.put_event`</td>
<td>Time taken to put a scaling activity in the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.gc`</td>
<td>Time taken to run the scaling state garbage collector for the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
</table>

# Autoscale Metrics

Autoscale metrics allow operators to get insight into how the autoscaler is functioning.
Expand Down
27 changes: 27 additions & 0 deletions pkg/policy/backend/consul/consul.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package consul
import (
"encoding/json"
"strings"
"time"

"github.com/armon/go-metrics"
"github.com/hashicorp/consul/api"
"github.com/jrasell/sherpa/pkg/client"
"github.com/jrasell/sherpa/pkg/policy"
Expand All @@ -18,6 +20,17 @@ const (
baseKVPath = "policies/"
)

// Define our metric keys.
var (
metricKeyGetPolicies = []string{"policy", "consul", "get_policies"}
metricKeyGetJobPolicy = []string{"policy", "consul", "get_job_policy"}
metricKeyGetJobGroupPolicy = []string{"policy", "consul", "get_job_group_policy"}
metricKeyPutJobPolicy = []string{"policy", "consul", "put_job_policy"}
metricKeyPutJobGroupPolicy = []string{"policy", "consul", "put_job_group_policy"}
metricKeyDeleteJobPolicy = []string{"policy", "consul", "delete_job_policy"}
metricKeyDeleteJobGroupPolicy = []string{"policy", "consul", "delete_job_group_policy"}
)

type PolicyBackend struct {
path string
logger zerolog.Logger
Expand All @@ -36,6 +49,8 @@ func NewConsulPolicyBackend(log zerolog.Logger, path string) backend.PolicyBacke
}

func (p *PolicyBackend) GetPolicies() (map[string]map[string]*policy.GroupScalingPolicy, error) {
defer metrics.MeasureSince(metricKeyGetPolicies, time.Now())

kv, _, err := p.kv.List(p.path, nil)
if err != nil {
return nil, err
Expand Down Expand Up @@ -69,6 +84,8 @@ func (p *PolicyBackend) GetPolicies() (map[string]map[string]*policy.GroupScalin
}

func (p *PolicyBackend) GetJobPolicy(job string) (map[string]*policy.GroupScalingPolicy, error) {
defer metrics.MeasureSince(metricKeyGetJobPolicy, time.Now())

kv, _, err := p.kv.List(p.path+job, nil)
if err != nil {
return nil, err
Expand Down Expand Up @@ -96,6 +113,8 @@ func (p *PolicyBackend) GetJobPolicy(job string) (map[string]*policy.GroupScalin
}

func (p *PolicyBackend) GetJobGroupPolicy(job, group string) (*policy.GroupScalingPolicy, error) {
defer metrics.MeasureSince(metricKeyGetJobGroupPolicy, time.Now())

kv, _, err := p.kv.Get(p.path+job+"/"+group, nil)
if err != nil {
return nil, err
Expand All @@ -115,6 +134,8 @@ func (p *PolicyBackend) GetJobGroupPolicy(job, group string) (*policy.GroupScali
}

func (p *PolicyBackend) PutJobPolicy(job string, groupPolicies map[string]*policy.GroupScalingPolicy) error {
defer metrics.MeasureSince(metricKeyPutJobPolicy, time.Now())

var kvOpts []*api.KVTxnOp // nolint:prealloc

for group, pol := range groupPolicies {
Expand Down Expand Up @@ -145,6 +166,8 @@ func (p *PolicyBackend) PutJobPolicy(job string, groupPolicies map[string]*polic
}

func (p *PolicyBackend) PutJobGroupPolicy(job, group string, pol *policy.GroupScalingPolicy) error {
defer metrics.MeasureSince(metricKeyPutJobGroupPolicy, time.Now())

marshal, err := json.Marshal(pol)
if err != nil {
return err
Expand All @@ -160,11 +183,15 @@ func (p *PolicyBackend) PutJobGroupPolicy(job, group string, pol *policy.GroupSc
}

func (p *PolicyBackend) DeleteJobPolicy(job string) error {
defer metrics.MeasureSince(metricKeyDeleteJobPolicy, time.Now())

_, err := p.kv.DeleteTree(p.path+job, nil)
return err
}

func (p *PolicyBackend) DeleteJobGroupPolicy(job, group string) error {
defer metrics.MeasureSince(metricKeyDeleteJobGroupPolicy, time.Now())

_, err := p.kv.Delete(p.path+job+"/"+group, nil)
return err
}
27 changes: 27 additions & 0 deletions pkg/policy/backend/memory/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,26 @@ package memory

import (
"sync"
"time"

"github.com/armon/go-metrics"
"github.com/jrasell/sherpa/pkg/policy"
"github.com/jrasell/sherpa/pkg/policy/backend"
)

var _ backend.PolicyBackend = (*PolicyBackend)(nil)

// Define our metric keys.
var (
metricKeyGetPolicies = []string{"policy", "memory", "get_policies"}
metricKeyGetJobPolicy = []string{"policy", "memory", "get_job_policy"}
metricKeyGetJobGroupPolicy = []string{"policy", "memory", "get_job_group_policy"}
metricKeyPutJobPolicy = []string{"policy", "memory", "put_job_policy"}
metricKeyPutJobGroupPolicy = []string{"policy", "memory", "put_job_group_policy"}
metricKeyDeleteJobPolicy = []string{"policy", "memory", "delete_job_policy"}
metricKeyDeleteJobGroupPolicy = []string{"policy", "memory", "delete_job_group_policy"}
)

type PolicyBackend struct {
policies map[string]map[string]*policy.GroupScalingPolicy
sync.RWMutex
Expand All @@ -21,13 +34,17 @@ func NewJobScalingPolicies() backend.PolicyBackend {
}

func (p *PolicyBackend) GetPolicies() (map[string]map[string]*policy.GroupScalingPolicy, error) {
defer metrics.MeasureSince(metricKeyGetPolicies, time.Now())

p.RLock()
val := p.policies
p.RUnlock()
return val, nil
}

func (p *PolicyBackend) GetJobPolicy(job string) (map[string]*policy.GroupScalingPolicy, error) {
defer metrics.MeasureSince(metricKeyGetJobPolicy, time.Now())

p.RLock()
defer p.RUnlock()

Expand All @@ -38,6 +55,8 @@ func (p *PolicyBackend) GetJobPolicy(job string) (map[string]*policy.GroupScalin
}

func (p *PolicyBackend) GetJobGroupPolicy(job, group string) (*policy.GroupScalingPolicy, error) {
defer metrics.MeasureSince(metricKeyGetJobGroupPolicy, time.Now())

p.RLock()
defer p.RUnlock()

Expand All @@ -48,6 +67,8 @@ func (p *PolicyBackend) GetJobGroupPolicy(job, group string) (*policy.GroupScali
}

func (p *PolicyBackend) PutJobPolicy(job string, policies map[string]*policy.GroupScalingPolicy) error {
defer metrics.MeasureSince(metricKeyPutJobPolicy, time.Now())

p.Lock()
defer p.Unlock()

Expand All @@ -62,6 +83,8 @@ func (p *PolicyBackend) PutJobPolicy(job string, policies map[string]*policy.Gro
}

func (p *PolicyBackend) PutJobGroupPolicy(job, group string, policies *policy.GroupScalingPolicy) error {
defer metrics.MeasureSince(metricKeyPutJobGroupPolicy, time.Now())

p.Lock()
defer p.Unlock()

Expand All @@ -76,6 +99,8 @@ func (p *PolicyBackend) PutJobGroupPolicy(job, group string, policies *policy.Gr
}

func (p *PolicyBackend) DeleteJobGroupPolicy(job, group string) error {
defer metrics.MeasureSince(metricKeyDeleteJobPolicy, time.Now())

p.Lock()
defer p.Unlock()

Expand All @@ -86,6 +111,8 @@ func (p *PolicyBackend) DeleteJobGroupPolicy(job, group string) error {
}

func (p *PolicyBackend) DeleteJobPolicy(job string) error {
defer metrics.MeasureSince(metricKeyDeleteJobGroupPolicy, time.Now())

p.Lock()
defer p.Unlock()

Expand Down
Loading

0 comments on commit 5789fd3

Please sign in to comment.