Skip to content

Commit

Permalink
Query-tee improvements (#8330)
Browse files Browse the repository at this point in the history
* Reduce bucket factor for relative performance metrics

* Flip meaning of metrics

* Update changelog entry

* Make `backend_response_relative_duration_proportional` report the difference

* Update docs
  • Loading branch information
charleskorn authored Jun 14, 2024
1 parent b905599 commit 06301d9
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 25 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@
### Query-tee

* [ENHANCEMENT] Log queries that take longer than `proxy.log-slow-query-response-threshold` when compared to other backends. #7346
* [ENHANCEMENT] Add two new metrics for measuring the relative duration between backends: #7782 #8013
* [ENHANCEMENT] Add two new metrics for measuring the relative duration between backends: #7782 #8013 #8330
* `cortex_querytee_backend_response_relative_duration_seconds`
* `cortex_querytee_backend_response_relative_duration_proportional`

Expand Down
4 changes: 2 additions & 2 deletions docs/sources/mimir/manage/tools/query-tee.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ cortex_querytee_responses_compared_total{route="<route>",result="<success|fail>"

Additionally, if backend results comparison is configured, two native histograms are available:

- `cortex_querytee_backend_response_relative_duration_seconds`: Time (in seconds) of preferred backend less secondary backend.
- `cortex_querytee_backend_response_relative_duration_proportional`: Response time of preferred backend, as a proportion of secondary backend response time.
- `cortex_querytee_backend_response_relative_duration_seconds`: Time (in seconds) of secondary backend less preferred backend.
- `cortex_querytee_backend_response_relative_duration_proportional`: Response time of secondary backend less preferred backend, as a proportion of preferred backend response time.

### Ruler remote operational mode test

Expand Down
6 changes: 3 additions & 3 deletions tools/querytee/proxy_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,10 @@ func (p *ProxyEndpoint) executeBackendRequests(req *http.Request, resCh chan *ba
)
}

relativeDuration := expectedResponse.elapsedTime - actualResponse.elapsedTime
proportionalDuration := expectedResponse.elapsedTime.Seconds() / actualResponse.elapsedTime.Seconds()
relativeDuration := actualResponse.elapsedTime - expectedResponse.elapsedTime
proportionalDurationDifference := relativeDuration.Seconds() / expectedResponse.elapsedTime.Seconds()
p.metrics.relativeDuration.WithLabelValues(p.routeName).Observe(relativeDuration.Seconds())
p.metrics.proportionalDuration.WithLabelValues(p.routeName).Observe(proportionalDuration)
p.metrics.proportionalDuration.WithLabelValues(p.routeName).Observe(proportionalDurationDifference)
p.metrics.responsesComparedTotal.WithLabelValues(p.routeName, string(result)).Inc()
}
}
Expand Down
30 changes: 15 additions & 15 deletions tools/querytee/proxy_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -454,25 +454,25 @@ func Test_ProxyEndpoint_RelativeDurationMetric(t *testing.T) {
expectedProportionalSampleSum float64
}{
"secondary backend is faster than preferred": {
latencyPairs: []latencyPair{{
preferredResponseLatency: 3 * time.Second,
secondaryResponseLatency: 1 * time.Second,
}, {
preferredResponseLatency: 5 * time.Second,
secondaryResponseLatency: 2 * time.Second,
},
latencyPairs: []latencyPair{
{
preferredResponseLatency: 3 * time.Second,
secondaryResponseLatency: 1 * time.Second,
}, {
preferredResponseLatency: 5 * time.Second,
secondaryResponseLatency: 2 * time.Second,
},
},
expectedDurationSampleSum: 5,
expectedProportionalSampleSum: (3.0/1 + 5.0/2),
expectedDurationSampleSum: -5,
expectedProportionalSampleSum: -2.0/3 + -3.0/5,
},
"preferred backend is 5 seconds faster than secondary": {
latencyPairs: []latencyPair{{
preferredResponseLatency: 2 * time.Second,
secondaryResponseLatency: 7 * time.Second,
},
},
expectedDurationSampleSum: -5,
expectedProportionalSampleSum: (2.0 / 7),
}},
expectedDurationSampleSum: 5,
expectedProportionalSampleSum: 5.0 / 2,
},
}

Expand Down Expand Up @@ -511,12 +511,12 @@ func Test_ProxyEndpoint_RelativeDurationMetric(t *testing.T) {
gotDuration := filterMetrics(got, []string{"cortex_querytee_backend_response_relative_duration_seconds"})
require.Equal(t, 1, len(gotDuration), "Expect only one metric after filtering")
require.Equal(t, uint64(len(scenario.latencyPairs)), gotDuration[0].Metric[0].Histogram.GetSampleCount())
require.Equal(t, scenario.expectedDurationSampleSum, gotDuration[0].Metric[0].Histogram.GetSampleSum())
require.InDelta(t, scenario.expectedDurationSampleSum, gotDuration[0].Metric[0].Histogram.GetSampleSum(), 1e-9)

gotProportional := filterMetrics(got, []string{"cortex_querytee_backend_response_relative_duration_proportional"})
require.Equal(t, 1, len(gotProportional), "Expect only one metric after filtering")
require.Equal(t, uint64(len(scenario.latencyPairs)), gotProportional[0].Metric[0].Histogram.GetSampleCount())
require.Equal(t, scenario.expectedProportionalSampleSum, gotProportional[0].Metric[0].Histogram.GetSampleSum())
require.InDelta(t, scenario.expectedProportionalSampleSum, gotProportional[0].Metric[0].Histogram.GetSampleSum(), 1e-9)
})
}
}
Expand Down
8 changes: 4 additions & 4 deletions tools/querytee/proxy_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ func NewProxyMetrics(registerer prometheus.Registerer) *ProxyMetrics {
relativeDuration: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{
Namespace: queryTeeMetricsNamespace,
Name: "backend_response_relative_duration_seconds",
Help: "Time (in seconds) of preferred backend less secondary backend.",
NativeHistogramBucketFactor: 2,
Help: "Time (in seconds) of secondary backend less preferred backend.",
NativeHistogramBucketFactor: 1.1,
}, []string{"route"}),
proportionalDuration: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{
Namespace: queryTeeMetricsNamespace,
Name: "backend_response_relative_duration_proportional",
Help: "Response time of preferred backend, as a proportion of secondary backend response time.",
NativeHistogramBucketFactor: 2,
Help: "Response time of secondary backend less preferred backend, as a proportion of preferred backend response time.",
NativeHistogramBucketFactor: 1.1,
}, []string{"route"}),
}

Expand Down

0 comments on commit 06301d9

Please sign in to comment.