From 1a35c0a3fc3472dae6ef02cf08da88fd202c32f4 Mon Sep 17 00:00:00 2001 From: Jason Wilder Date: Sat, 3 Sep 2016 17:29:01 -0600 Subject: [PATCH] Fix neverending full compactions The full compaction planner could return a plan that only included one generation. If this happened, a full compaction would run on that generation producing just one generation again. The planner would then repeat the plan. This could happen if there were two generations that were both over the max TSM file size and the second one happened to be in level 3 or lower. When this situation occurs, one cpu is pegged running a full compaction continuously and the disks become very busy basically rewriting the same files over and over again. This can eventually cause disk and CPU saturation if it occurs with more than one shard. Fixes #7074 --- CHANGELOG.md | 1 + tsdb/engine/tsm1/compact.go | 7 ++- tsdb/engine/tsm1/compact_test.go | 80 ++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b5c566bfff..3f67c96b7e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -131,6 +131,7 @@ With this release the systemd configuration files for InfluxDB will use the syst - [#7240](https://github.com/influxdata/influxdb/issues/7240): Allow blank lines in the line protocol input. - [#7119](https://github.com/influxdata/influxdb/pull/7119): Fix CREATE DATABASE when dealing with default values. - [#7243](https://github.com/influxdata/influxdb/issues/7243): Optimize queries that compare a tag value to an empty string. +- [#7074](https://github.com/influxdata/influxdb/issues/7074): Continuous full compactions ## v0.13.0 [2016-05-12] diff --git a/tsdb/engine/tsm1/compact.go b/tsdb/engine/tsm1/compact.go index 700438d02cf..0d2926c6ed8 100644 --- a/tsdb/engine/tsm1/compact.go +++ b/tsdb/engine/tsm1/compact.go @@ -283,11 +283,12 @@ func (c *DefaultPlanner) Plan(lastWrite time.Time) []CompactionGroup { // first check if we should be doing a full compaction because nothing has been written in a long time if c.CompactFullWriteColdDuration > 0 && time.Now().Sub(lastWrite) > c.CompactFullWriteColdDuration && len(generations) > 1 { var tsmFiles []string + var genCount int for i, group := range generations { var skip bool // Skip the file if it's over the max size and contains a full block and it does not have any tombstones - if group.size() > uint64(maxTSMFileSize) && c.FileStore.BlockCount(group.files[0].Path, 1) == tsdb.DefaultMaxPointsPerBlock && !group.hasTombstones() { + if len(generations) > 2 && group.size() > uint64(maxTSMFileSize) && c.FileStore.BlockCount(group.files[0].Path, 1) == tsdb.DefaultMaxPointsPerBlock && !group.hasTombstones() { skip = true } @@ -308,10 +309,12 @@ func (c *DefaultPlanner) Plan(lastWrite time.Time) []CompactionGroup { for _, f := range group.files { tsmFiles = append(tsmFiles, f.Path) } + genCount += 1 } sort.Strings(tsmFiles) - if len(tsmFiles) <= 1 { + // Make sure we have more than 1 file and more than 1 generation + if len(tsmFiles) <= 1 || genCount <= 1 { return nil } diff --git a/tsdb/engine/tsm1/compact_test.go b/tsdb/engine/tsm1/compact_test.go index 82dafe4fa28..8e7ffbb4aea 100644 --- a/tsdb/engine/tsm1/compact_test.go +++ b/tsdb/engine/tsm1/compact_test.go @@ -1847,6 +1847,18 @@ func TestDefaultPlanner_Plan_SkipPlanningAfterFull(t *testing.T) { Path: "02-05.tsm1", Size: 2049 * 1024 * 1024, }, + tsm1.FileStat{ + Path: "03-05.tsm1", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "04-05.tsm1", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "05-05.tsm1", + Size: 2049 * 1024 * 1024, + }, } overFs := &fakeFileStore{ @@ -1861,6 +1873,11 @@ func TestDefaultPlanner_Plan_SkipPlanningAfterFull(t *testing.T) { t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) } + // ensure the optimize planner would pick this up + if exp, got := 1, len(cp.PlanOptimize()); got != exp { + t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) + } + cp.FileStore = fs // ensure that it will plan if last modified has changed fs.lastModified = time.Now() @@ -1870,6 +1887,69 @@ func TestDefaultPlanner_Plan_SkipPlanningAfterFull(t *testing.T) { } } +// Tests that 2 generations, each over 2 GB and the second in level 2 does +// not return just the first generation. This was a case where full planning +// would get repeatedly plan the same files and never stop. +func TestDefaultPlanner_Plan_TwoGenLevel3(t *testing.T) { + data := []tsm1.FileStat{ + tsm1.FileStat{ + Path: "000002245-000001666.tsm", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "000002245-000001667.tsm", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "000002245-000001668.tsm", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "000002245-000001669.tsm", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "000002245-000001670.tsm", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "000002245-000001671.tsm", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "000002245-000001672.tsm", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "000002245-000001673.tsm", + Size: 192631258, + }, + tsm1.FileStat{ + Path: "000002246-000000002.tsm", + Size: 2049 * 1024 * 1024, + }, + tsm1.FileStat{ + Path: "000002246-000000003.tsm", + Size: 192631258, + }, + } + + cp := &tsm1.DefaultPlanner{ + FileStore: &fakeFileStore{ + blockCount: 1000, + PathsFn: func() []tsm1.FileStat { + return data + }, + }, + CompactFullWriteColdDuration: time.Hour, + } + + tsm := cp.Plan(time.Now().Add(-24 * time.Hour)) + if exp, got := 1, len(tsm); got != exp { + t.Fatalf("tsm file length mismatch: got %v, exp %v", got, exp) + } +} + // Ensure that the planner will return files over the max file // size, but do not contain full blocks func TestDefaultPlanner_Plan_NotFullOverMaxsize(t *testing.T) {