From 9ba5753e253dc60a751848c1fa6352c1d905187c Mon Sep 17 00:00:00 2001 From: Ed Welch Date: Wed, 25 Nov 2020 08:32:24 -0500 Subject: [PATCH] Loki: handle faults when opening boltdb files (#2988) * we also need to catch faults not just panics when opening boltdb files * fix lint * i'll get it one of these times * fix typo --- pkg/storage/stores/shipper/util/util.go | 32 ++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/pkg/storage/stores/shipper/util/util.go b/pkg/storage/stores/shipper/util/util.go index 4b34ebe208c57..8861043948d1b 100644 --- a/pkg/storage/stores/shipper/util/util.go +++ b/pkg/storage/stores/shipper/util/util.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "os" + "runtime/debug" "strings" "github.com/cortexproject/cortex/pkg/chunk/local" @@ -120,12 +121,37 @@ func CompressFile(src, dest string) error { return compressedFile.Sync() } +type result struct { + boltdb *bbolt.DB + err error +} + // SafeOpenBoltdbFile will recover from a panic opening a DB file, and return the panic message in the err return object. -func SafeOpenBoltdbFile(path string) (boltdb *bbolt.DB, err error) { +func SafeOpenBoltdbFile(path string) (*bbolt.DB, error) { + result := make(chan *result) + // Open the file in a separate goroutine because we want to change + // the behavior of a Fault for just this operation and not for the + // calling goroutine + go safeOpenBoltDbFile(path, result) + res := <-result + return res.boltdb, res.err +} + +func safeOpenBoltDbFile(path string, ret chan *result) { + // boltdb can throw faults which are not caught by recover unless we turn them into panics + debug.SetPanicOnFault(true) + res := &result{} + defer func() { if r := recover(); r != nil { - err = fmt.Errorf("recovered from panic opening boltdb file: %v", r) + res.err = fmt.Errorf("recovered from panic opening boltdb file: %v", r) } + + // Return the result object on the channel to unblock the calling thread + ret <- res }() - return local.OpenBoltdbFile(path) + + b, err := local.OpenBoltdbFile(path) + res.boltdb = b + res.err = err }