Skip to content
This repository has been archived by the owner on Sep 9, 2020. It is now read-only.

Commit

Permalink
Merge pull request #94 from jrasell/gh-37-scaling-state-backend
Browse files Browse the repository at this point in the history
add metrics to measure scaling state backend request latencies
  • Loading branch information
jrasell authored Nov 5, 2019
2 parents d8feec4 + 37b7087 commit 1fc1f41
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 2 deletions.
86 changes: 86 additions & 0 deletions docs/configuration/telemetry.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,89 @@ Policy backend metrics allow operators to get insight into how the policy storag
<td>Summary</td>
</tr>
</table>


# Scaling State Backend Metrics

Scaling state backend metrics allow operators to get insight into how the scaling state backend is functioning.

<table class="table table-bordered table-striped">
<tr>
<th>Metric</th>
<th>Description</th>
<th>Unit</th>
<th>Type</th>
</tr>
<tr>
<td>`sherpa.scale.state.memory.get_events`</td>
<td>Time taken to list all stored scaling activities from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.get_event`</td>
<td>Time taken to get a stored scaling activity from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.get_latest_events`</td>
<td>Time taken to list the latest stored scaling activities from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.get_latest_event`</td>
<td>Time taken to get the latest scaling activity for a job group from the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.put_event`</td>
<td>Time taken to put a scaling activity in the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.memory.gc`</td>
<td>Time taken to run the scaling state garbage collector for the memory backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.get_events`</td>
<td>Time taken to list all stored scaling activities from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.get_event`</td>
<td>Time taken to get a stored scaling activity from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.get_latest_events`</td>
<td>Time taken to list the latest stored scaling activities from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.get_latest_event`</td>
<td>Time taken to get the latest scaling activity for a job group from the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.put_event`</td>
<td>Time taken to put a scaling activity in the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
<tr>
<td>`sherpa.scale.state.consul.gc`</td>
<td>Time taken to run the scaling state garbage collector for the Consul backend</td>
<td>Milliseconds</td>
<td>Summary</td>
</tr>
</table>
25 changes: 24 additions & 1 deletion pkg/state/scale/consul/consul.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strings"
"time"

"github.com/armon/go-metrics"
"github.com/gofrs/uuid"
"github.com/hashicorp/consul/api"
"github.com/jrasell/sherpa/pkg/client"
Expand All @@ -23,6 +24,16 @@ const (
latestEventsKVPath = "state/latest-events/"
)

// Define our metric keys.
var (
metricKeyGetEvents = []string{"scale", "state", "consul", "get_events"}
metricKeyGetEvent = []string{"scale", "state", "consul", "get_event"}
metricKeyGetLatestEvents = []string{"scale", "state", "consul", "get_latest_events"}
metricKeyGetLatestEvent = []string{"scale", "state", "consul", "get_latest_event"}
metricKeyPutEvent = []string{"scale", "state", "consul", "put_event"}
metricKeyGC = []string{"scale", "state", "consul", "gc"}
)

type StateBackend struct {
basePath string
eventsPath string
Expand All @@ -47,6 +58,8 @@ func NewStateBackend(log zerolog.Logger, path string) scale.Backend {
}

func (s StateBackend) GetLatestScalingEvents() (map[string]*state.ScalingEvent, error) {
defer metrics.MeasureSince(metricKeyGetLatestEvents, time.Now())

kv, _, err := s.kv.List(s.latestEventsPath, nil)
if err != nil {
return nil, err
Expand All @@ -73,6 +86,8 @@ func (s StateBackend) GetLatestScalingEvents() (map[string]*state.ScalingEvent,
}

func (s StateBackend) GetLatestScalingEvent(job, group string) (*state.ScalingEvent, error) {
defer metrics.MeasureSince(metricKeyGetLatestEvent, time.Now())

kv, _, err := s.kv.Get(s.latestEventsPath+job+":"+group, nil)
if err != nil {
return nil, err
Expand All @@ -90,6 +105,8 @@ func (s StateBackend) GetLatestScalingEvent(job, group string) (*state.ScalingEv
}

func (s StateBackend) GetScalingEvents() (map[uuid.UUID]map[string]*state.ScalingEvent, error) {
defer metrics.MeasureSince(metricKeyGetEvents, time.Now())

kv, _, err := s.kv.List(s.eventsPath, nil)
if err != nil {
return nil, err
Expand Down Expand Up @@ -122,6 +139,8 @@ func (s StateBackend) GetScalingEvents() (map[uuid.UUID]map[string]*state.Scalin
}

func (s StateBackend) GetScalingEvent(id uuid.UUID) (map[string]*state.ScalingEvent, error) {
defer metrics.MeasureSince(metricKeyGetEvent, time.Now())

kv, _, err := s.kv.List(s.eventsPath+id.String(), nil)
if err != nil {
return nil, err
Expand All @@ -147,6 +166,7 @@ func (s StateBackend) GetScalingEvent(id uuid.UUID) (map[string]*state.ScalingEv
}

func (s StateBackend) PutScalingEvent(job string, event *state.ScalingEventMessage) error {
defer metrics.MeasureSince(metricKeyPutEvent, time.Now())

sEntry := &state.ScalingEvent{
EvalID: event.EvalID,
Expand Down Expand Up @@ -181,6 +201,9 @@ func (s StateBackend) PutScalingEvent(job string, event *state.ScalingEventMessa
}

func (s StateBackend) RunGarbageCollection() {
t := time.Now()
defer metrics.MeasureSince(metricKeyGC, t)

kv, _, err := s.kv.List(s.eventsPath, nil)
if err != nil {
s.logger.Error().Err(err).Msg("GC failed to list events in backend store")
Expand All @@ -190,7 +213,7 @@ func (s StateBackend) RunGarbageCollection() {
return
}

gc := time.Now().UTC().UnixNano() - s.gcThreshold
gc := t.UTC().UnixNano() - s.gcThreshold

for i := range kv {

Expand Down
25 changes: 24 additions & 1 deletion pkg/state/scale/memory/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,24 @@ import (
"sync"
"time"

"github.com/armon/go-metrics"
"github.com/gofrs/uuid"
"github.com/jrasell/sherpa/pkg/state"
"github.com/jrasell/sherpa/pkg/state/scale"
)

var _ scale.Backend = (*StateBackend)(nil)

// Define our metric keys.
var (
metricKeyGetEvents = []string{"scale", "state", "memory", "get_events"}
metricKeyGetEvent = []string{"scale", "state", "memory", "get_event"}
metricKeyGetLatestEvents = []string{"scale", "state", "memory", "get_latest_events"}
metricKeyGetLatestEvent = []string{"scale", "state", "memory", "get_latest_event"}
metricKeyPutEvent = []string{"scale", "state", "memory", "put_event"}
metricKeyGC = []string{"scale", "state", "memory", "gc"}
)

type StateBackend struct {
gcThreshold int64
state *state.ScalingState
Expand All @@ -28,27 +39,35 @@ func NewStateBackend() scale.Backend {
}

func (s *StateBackend) GetLatestScalingEvents() (map[string]*state.ScalingEvent, error) {
defer metrics.MeasureSince(metricKeyGetLatestEvents, time.Now())

s.RLock()
latest := s.state.LatestEvents
s.RUnlock()
return latest, nil
}

func (s *StateBackend) GetLatestScalingEvent(job, group string) (*state.ScalingEvent, error) {
defer metrics.MeasureSince(metricKeyGetLatestEvent, time.Now())

s.RLock()
latest := s.state.LatestEvents[job+":"+group]
s.RUnlock()
return latest, nil
}

func (s *StateBackend) GetScalingEvents() (map[uuid.UUID]map[string]*state.ScalingEvent, error) {
defer metrics.MeasureSince(metricKeyGetEvents, time.Now())

s.RLock()
events := s.state.Events
s.RUnlock()
return events, nil
}

func (s *StateBackend) PutScalingEvent(job string, event *state.ScalingEventMessage) error {
defer metrics.MeasureSince(metricKeyPutEvent, time.Now())

s.Lock()
defer s.Unlock()

Expand All @@ -71,15 +90,19 @@ func (s *StateBackend) PutScalingEvent(job string, event *state.ScalingEventMess
}

func (s *StateBackend) GetScalingEvent(id uuid.UUID) (map[string]*state.ScalingEvent, error) {
defer metrics.MeasureSince(metricKeyGetEvent, time.Now())

s.RLock()
e := s.state.Events[id]
s.RUnlock()
return e, nil
}

func (s *StateBackend) RunGarbageCollection() {
t := time.Now()
defer metrics.MeasureSince(metricKeyGC, t)

gc := time.Now().UTC().UnixNano() - s.gcThreshold
gc := t.UTC().UnixNano() - s.gcThreshold

newEventState := make(map[uuid.UUID]map[string]*state.ScalingEvent)

Expand Down

0 comments on commit 1fc1f41

Please sign in to comment.