Skip to content

Commit

Permalink
Better error handling and reporting for the user
Browse files Browse the repository at this point in the history
Return more details on the API.
Sync the error and log messages for easier correlation of logs and
also blocked_queries configuration.
Update docs and changelog.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
  • Loading branch information
krajorama committed Jun 12, 2024
1 parent e5dbfe7 commit ec65126
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 24 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
* [ENHANCEMENT] Ingester: reduce locked time while matching postings for a label, improving the write latency and compaction speed. #8327
* [ENHANCEMENT] Ingester: reduce the amount of locks taken during the Head compaction's garbage-collection process, improving the write latency and compaction speed. #8327
* [ENHANCEMENT] Query-frontend: log the start, end time and matchers for remote read requests to the query stats logs. #8326
* [ENHANCEMENT] Query-frontend: be able to block remote read queries via the per tenant runtime override `blocked_queries`. #8353
* [BUGFIX] Distributor: prometheus retry on 5xx and 429 errors, while otlp collector only retry on 429, 502, 503 and 504, mapping other 5xx errors to the retryable ones in otlp endpoint. #8324
* [BUGFIX] Distributor: make OTLP endpoint return marshalled proto bytes as response body for 4xx/5xx errors. #8227
* [BUGFIX] Rules: improve error handling when querier is local to the ruler. #7567
Expand Down
6 changes: 6 additions & 0 deletions docs/sources/mimir/configure/configure-blocked-queries.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ overrides:
regex: true
```
The blocking is enforced on instant and range queries as well as remote read queries.
For instant and range queries the pattern is evaluated against the query, for remote read requests, the pattern is evaluated against each set of matchers, as if the matchers formed a vector selector.
For example the remote read query that contains the matcher `__name__` regex matched to `foo.*` is interpreted as `{__name__=~"foo.*"}`. To restrict the blocking to such selectors, include the curly braces in your pattern, e.g. `{.*foo.*}`.

To set up runtime overrides, refer to [runtime configuration]({{< relref "./about-runtime-configuration" >}}).

{{% admonition type="note" %}}
Expand Down
14 changes: 13 additions & 1 deletion pkg/api/error/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ package error

import (
"encoding/json"
"errors"
"fmt"
"net/http"

"github.com/pkg/errors"

"github.com/grafana/dskit/httpgrpc"
)

Expand Down Expand Up @@ -118,6 +119,17 @@ func IsAPIError(err error) bool {
return errors.As(err, &apiErr)
}

// AddDetails adds details to an existing apiError, but keeps the type and handling.
// If the error is not an apiError, it will wrap the error with the details.
func AddDetails(err error, details string) error {
apiErr := &apiError{}
if !errors.As(err, &apiErr) {
return errors.Wrap(err, details)
}
apiErr.Message = fmt.Sprintf("%s: %s", details, apiErr.Message)
return apiErr
}

// IsNonRetryableAPIError returns true if err is an apiError which should be failed and not retried.
func IsNonRetryableAPIError(err error) bool {
apiErr := &apiError{}
Expand Down
37 changes: 25 additions & 12 deletions pkg/frontend/querymiddleware/remote_read.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,19 @@ import (
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/storage/remote"

apierror "github.com/grafana/mimir/pkg/api/error"
"github.com/grafana/mimir/pkg/querier"
"github.com/grafana/mimir/pkg/util"
)

// To keep logs and error messages in sync, we define the following keys:
const (
endLogKey = "end"
hintsLogKey = "hints"
matchersLogKey = "matchers"
startLogKey = "start"
)

type remoteReadRoundTripper struct {
next http.RoundTripper

Expand All @@ -46,7 +55,7 @@ func (r *remoteReadRoundTripper) RoundTrip(req *http.Request) (*http.Response, e
}

queries := remoteReadRequest.GetQueries()
for _, query := range queries {
for i, query := range queries {
metricsRequest, err := toMetricsRequest(req.URL.Path, query)
if err != nil {
return nil, err
Expand All @@ -58,7 +67,7 @@ func (r *remoteReadRoundTripper) RoundTrip(req *http.Request) (*http.Response, e
}))
_, error := handler.Do(req.Context(), metricsRequest)
if error != nil {
return nil, error
return nil, apierror.AddDetails(error, fmt.Sprintf("remote read error (%s_%d: %s)", matchersLogKey, i, metricsRequest.GetQuery()))
}
}

Expand Down Expand Up @@ -107,20 +116,20 @@ func parseRemoteReadRequest(remoteReadRequest *prompb.ReadRequest) (url.Values,
queries := remoteReadRequest.GetQueries()

for i, query := range queries {
add(i, "start", fmt.Sprintf("%d", query.GetStartTimestampMs()))
add(i, "end", fmt.Sprintf("%d", query.GetEndTimestampMs()))
add(i, startLogKey, fmt.Sprintf("%d", query.GetStartTimestampMs()))
add(i, endLogKey, fmt.Sprintf("%d", query.GetEndTimestampMs()))

matcher, err := remoteReadMatchersToString(query)
if err != nil {
return nil, err
}
params.Add("matchers_"+strconv.Itoa(i), matcher)
add(i, matchersLogKey, matcher)

if query.Hints != nil {
if hints, err := json.Marshal(query.Hints); err == nil {
add(i, "hints", string(hints))
add(i, hintsLogKey, string(hints))
} else {
add(i, "hints", fmt.Sprintf("error marshalling hints: %v", err))
add(i, hintsLogKey, fmt.Sprintf("error marshalling hints: %v", err))
}
}
}
Expand All @@ -129,15 +138,20 @@ func parseRemoteReadRequest(remoteReadRequest *prompb.ReadRequest) (url.Values,
}

func remoteReadMatchersToString(q *prompb.Query) (string, error) {
matchersStrings := make([]string, 0, len(q.GetMatchers()))
matchers, err := remote.FromLabelMatchers(q.GetMatchers())
if err != nil {
return "", err
}
for _, m := range matchers {
matchersStrings = append(matchersStrings, m.String())
builder := strings.Builder{}
builder.WriteRune('{')
for i, m := range matchers {
if i > 0 {
builder.WriteRune(',')
}
builder.WriteString(m.String())
}
return strings.Join(matchersStrings, ","), nil
builder.WriteRune('}')
return builder.String(), nil
}

func toMetricsRequest(path string, query *prompb.Query) (MetricsQueryRequest, error) {
Expand All @@ -150,7 +164,6 @@ func toMetricsRequest(path string, query *prompb.Query) (MetricsQueryRequest, er
if err != nil {
return nil, err
}
metricsQuery.promQuery = fmt.Sprintf("{%s}", metricsQuery.promQuery)
return metricsQuery, nil
}

Expand Down
15 changes: 10 additions & 5 deletions pkg/frontend/querymiddleware/remote_read_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ func TestParseRemoteReadRequestWithoutConsumingBody(t *testing.T) {
expectedParams: url.Values{
"start_0": []string{"0"},
"end_0": []string{"42"},
"matchers_0": []string{"__name__=\"some_metric\",foo=~\".*bar.*\""},
"matchers_0": []string{"{__name__=\"some_metric\",foo=~\".*bar.*\"}"},
"start_1": []string{"10"},
"end_1": []string{"20"},
"matchers_1": []string{"__name__=\"up\""},
"matchers_1": []string{"{__name__=\"up\"}"},
"hints_1": []string{"{\"step_ms\":1000}"},
},
},
Expand Down Expand Up @@ -100,7 +100,7 @@ func TestRemoteReadRoundTripperCallsDownstreamOnAll(t *testing.T) {
handler MetricsQueryHandler
expectDownstreamCalled int
expectMiddlewareCalled int
expectError error
expectError string
}{
"skipping middleware": {
handler: &skipMiddleware{},
Expand All @@ -111,7 +111,7 @@ func TestRemoteReadRoundTripperCallsDownstreamOnAll(t *testing.T) {
handler: &errorMiddleware{},
expectDownstreamCalled: 0,
expectMiddlewareCalled: 1,
expectError: fmt.Errorf("TestErrorMiddleware"),
expectError: "remote read error (matchers_0: {__name__=\"some_metric\",foo=~\".*bar.*\"}): TestErrorMiddleware",
},
}

Expand All @@ -125,7 +125,12 @@ func TestRemoteReadRoundTripperCallsDownstreamOnAll(t *testing.T) {
})
rr := newRemoteReadRoundTripper(roundTripper, middleware)
_, err := rr.RoundTrip(generateTestRemoteReadRequest())
require.Equal(t, tc.expectError, err)
if tc.expectError != "" {
require.Error(t, err)
require.Equal(t, tc.expectError, err.Error())
} else {
require.NoError(t, err)
}
require.Equal(t, tc.expectDownstreamCalled, roundTripper.called)
require.Equal(t, tc.expectMiddlewareCalled, countMiddleWareCalls)
})
Expand Down
6 changes: 3 additions & 3 deletions pkg/frontend/querymiddleware/roundtrip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,21 +529,21 @@ func TestRemoteReadMiddleware(t *testing.T) {
},
expectError: true,
expectAPIError: true,
expectErrorContains: "the request has been blocked by the cluster administrator (err-mimir-query-blocked)",
expectErrorContains: "remote read error (matchers_1: {__name__=\"up\"}): the request has been blocked by the cluster administrator (err-mimir-query-blocked)",
},
"block a regex query": {
makeRequest: generateTestRemoteReadRequest,
limits: mockLimits{
blockedQueries: []*validation.BlockedQuery{
{
Pattern: ".*up.*",
Pattern: "{.*foo.*}",
Regex: true,
},
},
},
expectError: true,
expectAPIError: true,
expectErrorContains: "the request has been blocked by the cluster administrator (err-mimir-query-blocked)",
expectErrorContains: "remote read error (matchers_0: {__name__=\"some_metric\",foo=~\".*bar.*\"}): the request has been blocked by the cluster administrator (err-mimir-query-blocked)",
},
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/frontend/transport/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,13 @@ func TestHandler_ServeHTTP(t *testing.T) {
r.Body = io.NopCloser(bytes.NewReader(compressed))
return r
},
expectedActivity: "user:12345 UA:test-user-agent req:GET /api/v1/read end_0=42&end_1=20&hints_1=%7B%22step_ms%22%3A1000%7D&matchers_0=__name__%3D%22some_metric%22%2Cfoo%3D~%22.%2Abar.%2A%22&matchers_1=__name__%3D%22up%22&start_0=0&start_1=10",
expectedActivity: "user:12345 UA:test-user-agent req:GET /api/v1/read end_0=42&end_1=20&hints_1=%7B%22step_ms%22%3A1000%7D&matchers_0=%7B__name__%3D%22some_metric%22%2Cfoo%3D~%22.%2Abar.%2A%22%7D&matchers_1=%7B__name__%3D%22up%22%7D&start_0=0&start_1=10",
expectedMetrics: 5,
expectedParams: url.Values{
"matchers_0": []string{"__name__=\"some_metric\",foo=~\".*bar.*\""},
"matchers_0": []string{"{__name__=\"some_metric\",foo=~\".*bar.*\"}"},
"start_0": []string{"0"},
"end_0": []string{"42"},
"matchers_1": []string{"__name__=\"up\""},
"matchers_1": []string{"{__name__=\"up\"}"},
"start_1": []string{"10"},
"end_1": []string{"20"},
"hints_1": []string{"{\"step_ms\":1000}"},
Expand Down

0 comments on commit ec65126

Please sign in to comment.