From 1b7c0f9de45cd560fc47fb1c5bfea0612c27c339 Mon Sep 17 00:00:00 2001 From: Travis Patterson Date: Thu, 24 Feb 2022 11:48:23 -0700 Subject: [PATCH 1/4] Fix potential deadlock in the table manager --- .../stores/shipper/downloads/table_manager.go | 34 +++++------ .../shipper/downloads/table_manager_test.go | 60 +++++++++++++------ 2 files changed, 56 insertions(+), 38 deletions(-) diff --git a/pkg/storage/stores/shipper/downloads/table_manager.go b/pkg/storage/stores/shipper/downloads/table_manager.go index 90dbe52fbf830..105751248d98d 100644 --- a/pkg/storage/stores/shipper/downloads/table_manager.go +++ b/pkg/storage/stores/shipper/downloads/table_manager.go @@ -153,31 +153,27 @@ func (tm *TableManager) query(ctx context.Context, tableName string, queries []c } func (tm *TableManager) getOrCreateTable(tableName string) (*Table, error) { - // if table is already there, use it. tm.tablesMtx.RLock() + defer tm.tablesMtx.RUnlock() + + // if table is already there, use it. table, ok := tm.tables[tableName] - tm.tablesMtx.RUnlock() + if ok { + return table, nil + } - if !ok { - tm.tablesMtx.Lock() - // check if some other competing goroutine got the lock before us and created the table, use it if so. - table, ok = tm.tables[tableName] - if !ok { - // table not found, creating one. - level.Info(util_log.Logger).Log("msg", fmt.Sprintf("downloading all files for table %s", tableName)) - - tablePath := filepath.Join(tm.cfg.CacheDir, tableName) - err := chunk_util.EnsureDirectory(tablePath) - if err != nil { - return nil, err - } + // table not found, creating one. + level.Info(util_log.Logger).Log("msg", fmt.Sprintf("downloading all files for table %s", tableName)) - table = NewTable(tableName, filepath.Join(tm.cfg.CacheDir, tableName), tm.indexStorageClient, tm.boltIndexClient, tm.metrics) - tm.tables[tableName] = table - } - tm.tablesMtx.Unlock() + tablePath := filepath.Join(tm.cfg.CacheDir, tableName) + err := chunk_util.EnsureDirectory(tablePath) + if err != nil { + return nil, err } + table = NewTable(tableName, filepath.Join(tm.cfg.CacheDir, tableName), tm.indexStorageClient, tm.boltIndexClient, tm.metrics) + tm.tables[tableName] = table + return table, nil } diff --git a/pkg/storage/stores/shipper/downloads/table_manager_test.go b/pkg/storage/stores/shipper/downloads/table_manager_test.go index e4786932d79fa..7a12b02db021f 100644 --- a/pkg/storage/stores/shipper/downloads/table_manager_test.go +++ b/pkg/storage/stores/shipper/downloads/table_manager_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "math" + "os" "path/filepath" "testing" "time" @@ -35,29 +36,50 @@ func buildTestTableManager(t *testing.T, path string) (*TableManager, stopFunc) } func TestTableManager_QueryPages(t *testing.T) { - tempDir := t.TempDir() - - objectStoragePath := filepath.Join(tempDir, objectsStorageDirName) + t.Run("QueryPages", func(t *testing.T) { + tempDir := t.TempDir() + objectStoragePath := filepath.Join(tempDir, objectsStorageDirName) - var queries []chunk.IndexQuery - for i, name := range []string{"table1", "table2"} { - testutil.SetupTable(t, filepath.Join(objectStoragePath, name), testutil.DBsConfig{ - NumUnCompactedDBs: 5, - DBRecordsStart: i * 1000, - }, testutil.PerUserDBsConfig{ - DBsConfig: testutil.DBsConfig{ + var queries []chunk.IndexQuery + for i, name := range []string{"table1", "table2"} { + testutil.SetupTable(t, filepath.Join(objectStoragePath, name), testutil.DBsConfig{ NumUnCompactedDBs: 5, - DBRecordsStart: i*1000 + 500, - }, - NumUsers: 1, - }) - queries = append(queries, chunk.IndexQuery{TableName: name}) - } + DBRecordsStart: i * 1000, + }, testutil.PerUserDBsConfig{ + DBsConfig: testutil.DBsConfig{ + NumUnCompactedDBs: 5, + DBRecordsStart: i*1000 + 500, + }, + NumUsers: 1, + }) + queries = append(queries, chunk.IndexQuery{TableName: name}) + } - tableManager, stopFunc := buildTestTableManager(t, tempDir) - defer stopFunc() + tableManager, stopFunc := buildTestTableManager(t, tempDir) + defer stopFunc() - testutil.TestMultiTableQuery(t, testutil.BuildUserID(0), queries, tableManager, 0, 2000) + testutil.TestMultiTableQuery(t, testutil.BuildUserID(0), queries, tableManager, 0, 2000) + }) + + t.Run("it doesn't deadlock when table create fails", func(t *testing.T) { + tempDir := os.TempDir() + + // This file forces chunk_util.EnsureDirectory to fail. Any write error would cause this + // deadlock + f, err := os.CreateTemp(filepath.Join(tempDir, "cache"), "not-a-directory") + require.NoError(t, err) + badTable := filepath.Base(f.Name()) + + tableManager, stopFunc := buildTestTableManager(t, tempDir) + defer stopFunc() + + err = tableManager.query(context.Background(), badTable, nil, nil) + require.Error(t, err) + + // This one deadlocks without the fix + err = tableManager.query(context.Background(), badTable, nil, nil) + require.Error(t, err) + }) } func TestTableManager_cleanupCache(t *testing.T) { From ea2e94d589bb68daecda73fe437d72f483c5d948 Mon Sep 17 00:00:00 2001 From: Travis Patterson Date: Fri, 25 Feb 2022 09:59:17 -0700 Subject: [PATCH 2/4] Ensure second lock is released on return --- .../stores/shipper/downloads/table_manager.go | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/pkg/storage/stores/shipper/downloads/table_manager.go b/pkg/storage/stores/shipper/downloads/table_manager.go index 105751248d98d..81c4b846da363 100644 --- a/pkg/storage/stores/shipper/downloads/table_manager.go +++ b/pkg/storage/stores/shipper/downloads/table_manager.go @@ -153,26 +153,31 @@ func (tm *TableManager) query(ctx context.Context, tableName string, queries []c } func (tm *TableManager) getOrCreateTable(tableName string) (*Table, error) { - tm.tablesMtx.RLock() - defer tm.tablesMtx.RUnlock() - // if table is already there, use it. + tm.tablesMtx.RLock() table, ok := tm.tables[tableName] - if ok { - return table, nil - } + tm.tablesMtx.RUnlock() - // table not found, creating one. - level.Info(util_log.Logger).Log("msg", fmt.Sprintf("downloading all files for table %s", tableName)) + if !ok { + tm.tablesMtx.Lock() + defer tm.tablesMtx.Unlock() - tablePath := filepath.Join(tm.cfg.CacheDir, tableName) - err := chunk_util.EnsureDirectory(tablePath) - if err != nil { - return nil, err - } + // check if some other competing goroutine got the lock before us and created the table, use it if so. + table, ok = tm.tables[tableName] + if !ok { + // table not found, creating one. + level.Info(util_log.Logger).Log("msg", fmt.Sprintf("downloading all files for table %s", tableName)) - table = NewTable(tableName, filepath.Join(tm.cfg.CacheDir, tableName), tm.indexStorageClient, tm.boltIndexClient, tm.metrics) - tm.tables[tableName] = table + tablePath := filepath.Join(tm.cfg.CacheDir, tableName) + err := chunk_util.EnsureDirectory(tablePath) + if err != nil { + return nil, err + } + + table = NewTable(tableName, filepath.Join(tm.cfg.CacheDir, tableName), tm.indexStorageClient, tm.boltIndexClient, tm.metrics) + tm.tables[tableName] = table + } + } return table, nil } From 72778af93d522a85541105d0d18538677ecd616a Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Mon, 28 Feb 2022 18:50:50 +0530 Subject: [PATCH 3/4] use t.TempDir instead of os.TempDir --- pkg/storage/stores/shipper/downloads/table_manager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/storage/stores/shipper/downloads/table_manager_test.go b/pkg/storage/stores/shipper/downloads/table_manager_test.go index 16fcce8fb1ec1..642a3082de582 100644 --- a/pkg/storage/stores/shipper/downloads/table_manager_test.go +++ b/pkg/storage/stores/shipper/downloads/table_manager_test.go @@ -62,7 +62,7 @@ func TestTableManager_QueryPages(t *testing.T) { }) t.Run("it doesn't deadlock when table create fails", func(t *testing.T) { - tempDir := os.TempDir() + tempDir := t.TempDir() // This file forces chunk_util.EnsureDirectory to fail. Any write error would cause this // deadlock From 13eea151573c0014cd45232c3508cbbb2e470163 Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Mon, 28 Feb 2022 19:25:49 +0530 Subject: [PATCH 4/4] fix broken test --- pkg/storage/stores/shipper/downloads/table_manager_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/storage/stores/shipper/downloads/table_manager_test.go b/pkg/storage/stores/shipper/downloads/table_manager_test.go index 642a3082de582..7d724b2c359b5 100644 --- a/pkg/storage/stores/shipper/downloads/table_manager_test.go +++ b/pkg/storage/stores/shipper/downloads/table_manager_test.go @@ -63,6 +63,7 @@ func TestTableManager_QueryPages(t *testing.T) { t.Run("it doesn't deadlock when table create fails", func(t *testing.T) { tempDir := t.TempDir() + require.NoError(t, os.Mkdir(filepath.Join(tempDir, "cache"), 0777)) // This file forces chunk_util.EnsureDirectory to fail. Any write error would cause this // deadlock