Skip to content

Commit

Permalink
Fix flaky e2e tests (#5563)
Browse files Browse the repository at this point in the history
* Tools: Fix e2e test flake

Signed-off-by: Saswata Mukherjee <saswataminsta@yahoo.com>

* Metadata: Fix flaky e2e test

Signed-off-by: Saswata Mukherjee <saswataminsta@yahoo.com>

* Compact: Fix flaky e2e test

Signed-off-by: Saswata Mukherjee <saswataminsta@yahoo.com>

* Bumping actions/cache to v3 for e2e tests

Signed-off-by: Saswata Mukherjee <saswataminsta@yahoo.com>

* Add missing e2e.WaitMissingMetrics

Signed-off-by: Saswata Mukherjee <saswataminsta@yahoo.com>
  • Loading branch information
saswatamcode authored Aug 2, 2022
1 parent 4af46dd commit 0febf14
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
- name: Check out code into the Go module directory.
uses: actions/checkout@v2

- uses: actions/cache@v1
- uses: actions/cache@v3
with:
path: ~/go/pkg/mod
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
Expand Down
37 changes: 19 additions & 18 deletions test/e2e/compact_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,7 @@ func testCompactWithStoreGateway(t *testing.T, penaltyDedup bool) {
bucketMatcher,
operationMatcher,
)),
e2e.WaitMissingMetrics(),
)

// Make sure compactor does not modify anything else over time.
Expand Down Expand Up @@ -761,24 +762,24 @@ func testCompactWithStoreGateway(t *testing.T, penaltyDedup bool) {

// NOTE: We cannot assert on intermediate `thanos_blocks_meta_` metrics as those are gauge and change dynamically due to many
// compaction groups. Wait for at least first compaction iteration (next is in 5m).
testutil.Ok(t, c.WaitSumMetrics(e2e.Greater(0), "thanos_compact_iterations_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(18), "thanos_compact_blocks_cleaned_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_block_cleanup_failures_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_blocks_marked_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_aborted_partial_uploads_deletion_attempts_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_group_compactions_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_group_vertical_compactions_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_group_compactions_failures_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(7), "thanos_compact_group_compaction_runs_started_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(7), "thanos_compact_group_compaction_runs_completed_total"))

testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_downsample_total"))
testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_downsample_failures_total"))

testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(float64(len(rawBlockIDs)+8+6-18-2+2)), "thanos_blocks_meta_synced"))
testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_sync_failures_total"))

testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_compact_halted"))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Greater(0), []string{"thanos_compact_iterations_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(18), []string{"thanos_compact_blocks_cleaned_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_compact_block_cleanup_failures_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_compact_blocks_marked_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_compact_aborted_partial_uploads_deletion_attempts_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_compact_group_compactions_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_compact_group_vertical_compactions_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_compact_group_compactions_failures_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(7), []string{"thanos_compact_group_compaction_runs_started_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(7), []string{"thanos_compact_group_compaction_runs_completed_total"}, e2e.WaitMissingMetrics()))

testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_compact_downsample_total"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_compact_downsample_failures_total"}, e2e.WaitMissingMetrics()))

testutil.Ok(t, str.WaitSumMetricsWithOptions(e2e.Equals(float64(len(rawBlockIDs)+8+6-18-2+2)), []string{"thanos_blocks_meta_synced"}, e2e.WaitMissingMetrics()))
testutil.Ok(t, str.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_blocks_meta_sync_failures_total"}, e2e.WaitMissingMetrics()))

testutil.Ok(t, c.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"thanos_compact_halted"}, e2e.WaitMissingMetrics()))
// Make sure compactor does not modify anything else over time.
testutil.Ok(t, c.Stop())

Expand Down
20 changes: 17 additions & 3 deletions test/e2e/metadata_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,29 @@ func TestMetadataAPI_Fanout(t *testing.T) {
// Wait metadata response to be ready as Prometheus gets metadata after scrape.
testutil.Ok(t, runutil.Retry(5*time.Second, ctx.Done(), func() error {
promMeta, err = promclient.NewDefaultClient().MetricMetadataInGRPC(ctx, urlParse(t, "http://"+prom1.Endpoint("http")), "", -1)
testutil.Ok(t, err)
if err != nil {
return err
}
if len(promMeta) > 0 {
return nil
}
return fmt.Errorf("empty metadata response from Prometheus")
}))

thanosMeta, err := promclient.NewDefaultClient().MetricMetadataInGRPC(ctx, urlParse(t, "http://"+q.Endpoint("http")), "", -1)
testutil.Ok(t, err)
var thanosMeta map[string][]metadatapb.Meta
// Retry until length of metadata response is the same as Prometheus.
testutil.Ok(t, runutil.Retry(5*time.Second, ctx.Done(), func() error {
thanosMeta, err = promclient.NewDefaultClient().MetricMetadataInGRPC(ctx, urlParse(t, "http://"+q.Endpoint("http")), "", -1)
if err != nil {
return err
}
if len(thanosMeta) == len(promMeta) {
return nil
}

return fmt.Errorf("different metadata response from Prometheus")
}))

testutil.Assert(t, len(thanosMeta) > 0, "got empty metadata response from Thanos")

// Metadata response from Prometheus and Thanos Querier should be the same after deduplication.
Expand Down
61 changes: 45 additions & 16 deletions test/e2e/tools_bucket_web_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/thanos-io/objstore/client"

v1 "github.com/thanos-io/thanos/pkg/api/blocks"
"github.com/thanos-io/thanos/pkg/errors"
"github.com/thanos-io/thanos/pkg/runutil"
"github.com/thanos-io/thanos/pkg/testutil"
"github.com/thanos-io/thanos/test/e2e/e2ethanos"
Expand Down Expand Up @@ -214,24 +215,52 @@ func TestToolsBucketWebWithTimeAndRelabelFilter(t *testing.T) {
)
testutil.Ok(t, e2e.StartAndWaitReady(b))

// Request blocks api.
resp, err := http.DefaultClient.Get("http://" + b.Endpoint("http") + "/api/v1/blocks")
testutil.Ok(t, err)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
t.Cleanup(cancel)

testutil.Equals(t, http.StatusOK, resp.StatusCode)
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
testutil.Ok(t, err)
var data struct {
var respData struct {
Status string
Data *v1.BlocksInfo
}
testutil.Ok(t, json.Unmarshal(body, &data))
testutil.Equals(t, "success", data.Status)

// Filtered by time and relabel, result only one blocks.
testutil.Equals(t, 1, len(data.Data.Blocks))
testutil.Equals(t, data.Data.Blocks[0].MaxTime, blocks[0].maxt)
testutil.Equals(t, data.Data.Blocks[0].MinTime, blocks[0].mint)
testutil.Equals(t, data.Data.Blocks[0].Thanos.Labels, blocks[0].extLset.Map())

testutil.Ok(t, runutil.Retry(5*time.Second, ctx.Done(), func() error {
// Request blocks api.
resp, err := http.DefaultClient.Get("http://" + b.Endpoint("http") + "/api/v1/blocks")
if err != nil {
return err
}

if resp.StatusCode != http.StatusOK {
return errors.Newf("statuscode is not 200, got %d", resp.StatusCode)
}

body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return errors.Wrapf(err, "error reading body")
}

if err := resp.Body.Close(); err != nil {
return errors.Wrapf(err, "error closing body")
}

if err := json.Unmarshal(body, &respData); err != nil {
return errors.Wrapf(err, "error unmarshaling body")
}

if respData.Status != "success" {
return errors.Newf("status is not success, got %s", respData.Status)
}

// Filtered by time and relabel, result only one blocks.
if len(respData.Data.Blocks) == 1 {
return nil
}

return errors.Newf("expected 1 block, got %d", len(respData.Data.Blocks))
}))

testutil.Equals(t, 1, len(respData.Data.Blocks))
testutil.Equals(t, respData.Data.Blocks[0].MaxTime, blocks[0].maxt)
testutil.Equals(t, respData.Data.Blocks[0].MinTime, blocks[0].mint)
testutil.Equals(t, respData.Data.Blocks[0].Thanos.Labels, blocks[0].extLset.Map())
}

0 comments on commit 0febf14

Please sign in to comment.