Skip to content

Commit

Permalink
Support attributing client metrics back to the owning clusterj
Browse files Browse the repository at this point in the history
This adds a new cluster label to client metrics. This can be used to
join the client metrics with the cluster metrics in the cluster
dashboards.

We set the cluster label only if we can determine it from the address,
otherwise it remains an empty string.
  • Loading branch information
justinmir committed Sep 6, 2024
1 parent febde9a commit fea934f
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 3 deletions.
7 changes: 7 additions & 0 deletions redis/db/redisbp/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ type SpanHook struct {
Type string
Deployment string
Database string
// The cluster identifier based on the connection address. If we cannot identify
// a cluster based on connection address this field will be empty.
Cluster string

promActive *prometheusbpint.HighWatermarkGauge
}
Expand Down Expand Up @@ -94,6 +97,7 @@ func (h SpanHook) startChildSpan(ctx context.Context, cmdName string) context.Co
redisprom.CommandLabel: cmdName,
redisprom.DeploymentLabel: h.Deployment,
redisprom.DatabaseLabel: h.Database,
redisprom.ClusterLabel: h.Cluster,
}).Inc()
return context.WithValue(ctx, promCtxKey, &promCtx{
command: cmdName,
Expand All @@ -118,6 +122,7 @@ func (h SpanHook) endChildSpan(ctx context.Context, err error) {
redisprom.DeploymentLabel: h.Deployment,
redisprom.SuccessLabel: prometheusbp.BoolString(err == nil),
redisprom.DatabaseLabel: h.Database,
redisprom.ClusterLabel: h.Cluster,
}).Observe(durationSeconds)
}
// Outside of the context casting because we always want this to work.
Expand All @@ -128,13 +133,15 @@ func (h SpanHook) endChildSpan(ctx context.Context, err error) {
redisprom.DeploymentLabel: h.Deployment,
redisprom.SuccessLabel: prometheusbp.BoolString(err == nil),
redisprom.DatabaseLabel: h.Database,
redisprom.ClusterLabel: h.Cluster,
}).Inc()
redisprom.ActiveRequests.With(prometheus.Labels{
redisprom.ClientNameLabel: h.ClientName,
redisprom.TypeLabel: h.Type,
redisprom.CommandLabel: command,
redisprom.DeploymentLabel: h.Deployment,
redisprom.DatabaseLabel: h.Database,
redisprom.ClusterLabel: h.Cluster,
}).Dec()
if h.promActive != nil {
h.promActive.Dec()
Expand Down
2 changes: 2 additions & 0 deletions redis/db/redisbp/hooks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ func TestSpanHook(t *testing.T) {
ClientName: "redis",
Type: "type",
Deployment: "Deployment",
Cluster: "cluster",
}
statusCmd := redis.NewStatusCmd(ctx, "ping")
stringCmd := redis.NewStringCmd(ctx, "get", "1")
Expand All @@ -40,6 +41,7 @@ func TestSpanHook(t *testing.T) {
redisprom.DeploymentLabel: "Deployment",
redisprom.SuccessLabel: "true",
redisprom.DatabaseLabel: "",
redisprom.ClusterLabel: "cluster",
}
defer promtest.NewPrometheusMetricTest(t, "spec latency timer", redisprom.LatencySeconds, labels).CheckSampleCountDelta(1)
defer promtest.NewPrometheusMetricTest(t, "spec requests total", redisprom.RequestsTotal, labels).CheckDelta(1)
Expand Down
21 changes: 21 additions & 0 deletions redis/db/redisbp/monitored_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,32 @@ func getDeploymentType(addr string) string {
}
}

func getTargetCluster(addr string) string {
if strings.Contains(addr, "cache.amazonaws") {
return ""
} else {
// redis-<cluster name>.<vpc>.<region>.<postfix>.net:6379
tokens := strings.Split(addr, ".")
if len(tokens) != 5 {
return ""
}

if strings.Contains(tokens[0], "redis-") && len(tokens[0]) > 6 {
return tokens[0][6:]
}

return ""
}
}

// NewMonitoredClient creates a new *redis.Client object with a redisbp.SpanHook
// attached that connects to a single Redis instance.
func NewMonitoredClient(name string, opt *redis.Options) *redis.Client {
hook := SpanHook{
ClientName: name,
Type: "standalone",
Deployment: getDeploymentType(opt.Addr),
Cluster: getTargetCluster(opt.Addr),
Database: strconv.Itoa(opt.DB),
promActive: &prometheusbpint.HighWatermarkGauge{
HighWatermarkValue: &prometheusbpint.HighWatermarkValue{},
Expand Down Expand Up @@ -79,6 +98,7 @@ func NewMonitoredFailoverClient(name string, opt *redis.FailoverOptions) *redis.
ClientName: name,
Type: "sentinel",
Deployment: getDeploymentType(opt.SentinelAddrs[0]),
Cluster: getTargetCluster(opt.SentinelAddrs[0]),
Database: strconv.Itoa(opt.DB),
promActive: &prometheusbpint.HighWatermarkGauge{
HighWatermarkValue: &prometheusbpint.HighWatermarkValue{},
Expand Down Expand Up @@ -154,6 +174,7 @@ func NewMonitoredClusterClient(name string, opt *redis.ClusterOptions) *ClusterC
ClientName: name,
Type: "cluster",
Deployment: getDeploymentType(opt.Addrs[0]),
Cluster: getTargetCluster(opt.Addrs[0]),
Database: "", // We don't have that for cluster clients
promActive: &prometheusbpint.HighWatermarkGauge{
HighWatermarkValue: &prometheusbpint.HighWatermarkValue{},
Expand Down
7 changes: 4 additions & 3 deletions redis/internal/redisprom/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ const (
TypeLabel = "redis_type" // MUST BE one of standalone, cluster, sentinel, identifies the backend's configuration for responding to the redis request
DeploymentLabel = "redis_deployment" // MUST BE one of reddit, elasticache, identifies the provider of the redis backend (not the explicit address)
CommandLabel = "redis_command" // SHALL reflect to Redis command being executed for the request (ie SET)
ClusterLabel = "redis_cluster" // MAY be blank if the cluster name cannot be determined from the address.
)

var (
Expand All @@ -24,20 +25,20 @@ var (
Help: "latency histogram",
Buckets: prometheusbp.DefaultLatencyBuckets,
},
[]string{ClientNameLabel, DatabaseLabel, TypeLabel, DeploymentLabel, CommandLabel, SuccessLabel},
[]string{ClientNameLabel, DatabaseLabel, TypeLabel, DeploymentLabel, CommandLabel, SuccessLabel, ClusterLabel},
)
ActiveRequests = promauto.With(prometheusbpint.GlobalRegistry).NewGaugeVec(
prometheus.GaugeOpts{
Name: "redis_client_active_requests",
Help: "total requests that are in-flight",
},
[]string{ClientNameLabel, DatabaseLabel, TypeLabel, DeploymentLabel, CommandLabel},
[]string{ClientNameLabel, DatabaseLabel, TypeLabel, DeploymentLabel, CommandLabel, ClusterLabel},
)
RequestsTotal = promauto.With(prometheusbpint.GlobalRegistry).NewCounterVec(
prometheus.CounterOpts{
Name: "redis_client_requests_total",
Help: "total request counter",
},
[]string{ClientNameLabel, DatabaseLabel, TypeLabel, DeploymentLabel, CommandLabel, SuccessLabel},
[]string{ClientNameLabel, DatabaseLabel, TypeLabel, DeploymentLabel, CommandLabel, SuccessLabel, ClusterLabel},
)
)

0 comments on commit fea934f

Please sign in to comment.