Skip to content

Commit

Permalink
Add metrics to measure processing time
Browse files Browse the repository at this point in the history
Signed-off-by: Masayuki Ishii <masa213f@gmail.com>
  • Loading branch information
masa213f committed Jan 27, 2023
1 parent 125be6e commit cffe9c4
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 15 deletions.
5 changes: 5 additions & 0 deletions clustering/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type metricsSet struct {
replicas prometheus.Gauge
readyReplicas prometheus.Gauge
errantReplicas prometheus.Gauge
processingTime prometheus.Observer

backupTimestamp prometheus.Gauge
backupElapsed prometheus.Gauge
Expand Down Expand Up @@ -75,6 +76,7 @@ func newManagerProcess(c client.Client, r client.Reader, recorder record.EventRe
replicas: metrics.TotalReplicasVec.WithLabelValues(name.Name, name.Namespace),
readyReplicas: metrics.ReadyReplicasVec.WithLabelValues(name.Name, name.Namespace),
errantReplicas: metrics.ErrantReplicasVec.WithLabelValues(name.Name, name.Namespace),
processingTime: metrics.ProcessingTimeVec.WithLabelValues(name.Name, name.Namespace),
backupTimestamp: metrics.BackupTimestamp.WithLabelValues(name.Name, name.Namespace),
backupElapsed: metrics.BackupElapsed.WithLabelValues(name.Name, name.Namespace),
backupDumpSize: metrics.BackupDumpSize.WithLabelValues(name.Name, name.Namespace),
Expand All @@ -92,6 +94,7 @@ func newManagerProcess(c client.Client, r client.Reader, recorder record.EventRe
metrics.TotalReplicasVec.DeleteLabelValues(name.Name, name.Namespace)
metrics.ReadyReplicasVec.DeleteLabelValues(name.Name, name.Namespace)
metrics.ErrantReplicasVec.DeleteLabelValues(name.Name, name.Namespace)
metrics.ProcessingTimeVec.DeleteLabelValues(name.Name, name.Namespace)
metrics.BackupTimestamp.DeleteLabelValues(name.Name, name.Namespace)
metrics.BackupElapsed.DeleteLabelValues(name.Name, name.Namespace)
metrics.BackupDumpSize.DeleteLabelValues(name.Name, name.Namespace)
Expand Down Expand Up @@ -130,7 +133,9 @@ func (p *managerProcess) Start(ctx context.Context, interval time.Duration) {
}

p.metrics.checkCount.Inc()
startTime := time.Now()
redo, err := p.do(ctx)
p.metrics.processingTime.Observe(time.Since(startTime).Seconds())
if err != nil {
p.metrics.errorCount.Inc()
p.log.Error(err, "error")
Expand Down
31 changes: 16 additions & 15 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,22 @@ Aside from [the standard Go runtime and process metrics][standard], it exposes m

All these metrics are prefixed with `moco_cluster_` and have `name` and `namespace` labels.

| Name | Description | Type |
|-------------------------------------|------------------------------------------------------------------------|---------|
| `checks_total` | The number of times MOCO checked the cluster | Counter |
| `errors_total` | The number of times MOCO encountered errors when managing the cluster | Counter |
| `available` | 1 if the cluster is available, 0 otherwise | Gauge |
| `healthy` | 1 if the cluster is running without any problems, 0 otherwise | Gauge |
| `switchover_total` | The number of times MOCO changed the live primary instance | Counter |
| `failover_total` | The number of times MOCO changed the failed primary instance | Counter |
| `replicas` | The number of mysqld instances in the cluster | Gauge |
| `ready_replicas` | The number of ready mysqld Pods in the cluster | Gauge |
| `errant_replicas` | The number of mysqld instances that have [errant transactions][errant] | Gauge |
| `volume_resized_total` | The number of successful volume resizes | Counter |
| `volume_resized_errors_total` | The number of failed volume resizes | Counter |
| `statefulset_recreate_total` | The number of successful StatefulSet recreates | Counter |
| `statefulset_recreate_errors_total` | The number of failed StatefulSet recreates | Counter |
| Name | Description | Type |
| ----------------------------------- | ---------------------------------------------------------------------- | --------- |
| `checks_total` | The number of times MOCO checked the cluster | Counter |
| `errors_total` | The number of times MOCO encountered errors when managing the cluster | Counter |
| `available` | 1 if the cluster is available, 0 otherwise | Gauge |
| `healthy` | 1 if the cluster is running without any problems, 0 otherwise | Gauge |
| `switchover_total` | The number of times MOCO changed the live primary instance | Counter |
| `failover_total` | The number of times MOCO changed the failed primary instance | Counter |
| `replicas` | The number of mysqld instances in the cluster | Gauge |
| `ready_replicas` | The number of ready mysqld Pods in the cluster | Gauge |
| `errant_replicas` | The number of mysqld instances that have [errant transactions][errant] | Gauge |
| `processing_time_seconds` | The length of time in seconds processing the cluster | Histogram |
| `volume_resized_total` | The number of successful volume resizes | Counter |
| `volume_resized_errors_total` | The number of failed volume resizes | Counter |
| `statefulset_recreate_total` | The number of successful StatefulSet recreates | Counter |
| `statefulset_recreate_errors_total` | The number of failed StatefulSet recreates | Counter |

### Backup

Expand Down
10 changes: 10 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ var (
TotalReplicasVec *prometheus.GaugeVec
ReadyReplicasVec *prometheus.GaugeVec
ErrantReplicasVec *prometheus.GaugeVec
ProcessingTimeVec *prometheus.HistogramVec

VolumeResizedTotal *prometheus.CounterVec
VolumeResizedErrorTotal *prometheus.CounterVec
Expand Down Expand Up @@ -112,6 +113,15 @@ func Register(registry prometheus.Registerer) {
}, []string{"name", "namespace"})
registry.MustRegister(ErrantReplicasVec)

ProcessingTimeVec = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: clusteringSubsystem,
Name: "processing_time_seconds",
Help: "The length of time in seconds processing the cluster",
Buckets: []float64{0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10, 20, 30},
}, []string{"name", "namespace"})
registry.MustRegister(ProcessingTimeVec)

BackupTimestamp = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: metricsNamespace,
Subsystem: backupSubsystem,
Expand Down

0 comments on commit cffe9c4

Please sign in to comment.