Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add better observability to queryReadiness #5946

Merged
merged 9 commits into from
Apr 28, 2022
14 changes: 14 additions & 0 deletions pkg/storage/stores/shipper/downloads/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package downloads
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/weaveworks/common/instrument"
)

const (
Expand All @@ -14,6 +15,8 @@ type metrics struct {
queryTimeTableDownloadDurationSeconds *prometheus.CounterVec
tablesSyncOperationTotal *prometheus.CounterVec
tablesDownloadOperationDurationSeconds prometheus.Gauge
ensureQueryReadinessDurationSeconds prometheus.Histogram
usersToBeQueryReadyForTotal prometheus.Gauge
}

func newMetrics(r prometheus.Registerer) *metrics {
Expand All @@ -23,6 +26,17 @@ func newMetrics(r prometheus.Registerer) *metrics {
Name: "query_time_table_download_duration_seconds",
Help: "Time (in seconds) spent in downloading of files per table at query time",
}, []string{"table"}),
ensureQueryReadinessDurationSeconds: promauto.With(r).NewHistogram(prometheus.HistogramOpts{
Namespace: "loki_boltdb_shipper",
Name: "query_readiness_duration_seconds",
Help: "Time (in seconds) spent making an index gateway ready to be queried",
Buckets: instrument.DefBuckets,
}),
usersToBeQueryReadyForTotal: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "loki_boltdb_shipper",
Name: "users_to_be_query_ready_for_total",
Help: "Total number of users an index gateway instance has to be query ready for.",
}),
DylanGuedes marked this conversation as resolved.
Show resolved Hide resolved
tablesSyncOperationTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: "loki_boltdb_shipper",
Name: "tables_sync_operation_total",
Expand Down
11 changes: 11 additions & 0 deletions pkg/storage/stores/shipper/downloads/table_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,14 @@ func (tm *TableManager) cleanupCache() error {

// ensureQueryReadiness compares tables required for being query ready with the tables we already have and downloads the missing ones.
func (tm *TableManager) ensureQueryReadiness(ctx context.Context) error {
start := time.Now()
usersToBeQueryReadyLen := 0
defer func() {
duration := time.Since(start)
tm.metrics.ensureQueryReadinessDurationSeconds.Observe(duration.Seconds())
DylanGuedes marked this conversation as resolved.
Show resolved Hide resolved
tm.metrics.usersToBeQueryReadyForTotal.Set(float64(usersToBeQueryReadyLen))
}()

activeTableNumber := getActiveTableNumber()

// find the largest query readiness number
Expand Down Expand Up @@ -309,6 +317,9 @@ func (tm *TableManager) ensureQueryReadiness(ctx context.Context) error {
return err
}

usersToBeQueryReadyLen += len(usersToBeQueryReadyFor)
DylanGuedes marked this conversation as resolved.
Show resolved Hide resolved

level.Debug(util_log.Logger).Log("msg", "instance should be query ready for users", "users", usersToBeQueryReadyFor)
DylanGuedes marked this conversation as resolved.
Show resolved Hide resolved
if err := table.EnsureQueryReadiness(ctx, usersToBeQueryReadyFor); err != nil {
return err
}
Expand Down