Skip to content

Commit

Permalink
Merge pull request #641 from fluxcd/improve-cgroup-discovery
Browse files Browse the repository at this point in the history
oomwatch: auto detect well known cgroup paths
  • Loading branch information
hiddeco authored Mar 10, 2023
2 parents 629cd06 + b732420 commit 99f6339
Show file tree
Hide file tree
Showing 3 changed files with 224 additions and 42 deletions.
90 changes: 70 additions & 20 deletions internal/oomwatch/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,26 @@ import (
"github.com/go-logr/logr"
)

const (
// DefaultCgroupPath is the default path to the cgroup directory.
var (
// DefaultCgroupPath is the default path to the cgroup directory within a
// container. It is used to discover the cgroup files if they are not
// provided.
DefaultCgroupPath = "/sys/fs/cgroup/"
// MemoryMaxFile is the cgroup memory.max filename.
)

const (
// MemoryLimitFile is the cgroup v1 memory.limit_in_bytes filepath relative
// to DefaultCgroupPath.
MemoryLimitFile = "memory/memory.limit_in_bytes"
// MemoryUsageFile is the cgroup v1 memory.usage_in_bytes filepath relative
// to DefaultCgroupPath.
MemoryUsageFile = "memory/memory.usage_in_bytes"

// MemoryMaxFile is the cgroup v2 memory.max filepath relative to
// DefaultCgroupPath.
MemoryMaxFile = "memory.max"
// MemoryCurrentFile is the cgroup memory.current filename.
// MemoryCurrentFile is the cgroup v2 memory.current filepath relative to
// DefaultCgroupPath.
MemoryCurrentFile = "memory.current"
)

Expand All @@ -61,8 +75,11 @@ type Watcher struct {
once sync.Once
}

// New returns a new Watcher.
func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (*Watcher, error) {
// New returns a new Watcher with the given configuration. If the provided
// paths are empty, it will attempt to discover the paths to the cgroup files.
// It returns an error if the paths cannot be discovered or if the provided
// configuration is invalid.
func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (_ *Watcher, err error) {
if memoryUsagePercentThreshold < 1 || memoryUsagePercentThreshold > 100 {
return nil, fmt.Errorf("memory usage percent threshold must be between 1 and 100, got %d", memoryUsagePercentThreshold)
}
Expand All @@ -71,13 +88,18 @@ func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold ui
return nil, fmt.Errorf("interval must be at least %s, got %s", minInterval, interval)
}

if _, err := os.Lstat(memoryCurrentPath); err != nil {
return nil, fmt.Errorf("failed to stat memory.current %q: %w", memoryCurrentPath, err)
memoryMaxPath, memoryCurrentPath, err = discoverCgroupPaths(memoryMaxPath, memoryCurrentPath)
if err != nil {
return nil, err
}

if _, err = os.Lstat(memoryCurrentPath); err != nil {
return nil, fmt.Errorf("failed to confirm existence of current memory usage file: %w", err)
}

memoryMax, err := readUintFromFile(memoryMaxPath)
if err != nil {
return nil, fmt.Errorf("failed to read memory.max %q: %w", memoryMaxPath, err)
return nil, fmt.Errorf("failed to read memory usage limit: %w", err)
}

return &Watcher{
Expand All @@ -89,17 +111,6 @@ func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold ui
}, nil
}

// NewDefault returns a new Watcher with default path values.
func NewDefault(memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (*Watcher, error) {
return New(
filepath.Join(DefaultCgroupPath, MemoryMaxFile),
filepath.Join(DefaultCgroupPath, MemoryCurrentFile),
memoryUsagePercentThreshold,
interval,
logger,
)
}

// Watch returns a context that is canceled when the system reaches the
// configured memory usage threshold. Calling Watch multiple times will return
// the same context.
Expand Down Expand Up @@ -144,6 +155,45 @@ func (w *Watcher) watchForNearOOM(ctx context.Context) {
}
}

// discoverCgroupPaths attempts to automatically discover the cgroup v1 and v2
// paths for the max and current memory files when they are not provided. It
// returns the discovered and/or provided max and current paths.
// When a path is not provided and cannot be discovered, an error is returned.
func discoverCgroupPaths(memoryMaxPath, memoryCurrentPath string) (string, string, error) {
if memoryMaxPath == "" {
maxPathV1 := filepath.Join(DefaultCgroupPath, MemoryLimitFile)
maxPathV2 := filepath.Join(DefaultCgroupPath, MemoryMaxFile)

if _, err := os.Lstat(maxPathV2); err == nil {
memoryMaxPath = maxPathV2
} else if _, err = os.Lstat(maxPathV1); err == nil {
memoryMaxPath = maxPathV1
}
}
if memoryCurrentPath == "" {
currentPathV1 := filepath.Join(DefaultCgroupPath, MemoryUsageFile)
currentPathV2 := filepath.Join(DefaultCgroupPath, MemoryCurrentFile)

if _, err := os.Lstat(currentPathV2); err == nil {
memoryCurrentPath = currentPathV2
} else if _, err = os.Lstat(currentPathV1); err == nil {
memoryCurrentPath = currentPathV1
}
}

if memoryMaxPath == "" && memoryCurrentPath == "" {
return "", "", fmt.Errorf("failed to discover cgroup paths, please specify them manually")
}
if memoryMaxPath == "" {
return "", "", fmt.Errorf("failed to discover max memory path, please specify it manually")
}
if memoryCurrentPath == "" {
return "", "", fmt.Errorf("failed to discover current memory path, please specify it manually")
}

return memoryMaxPath, memoryCurrentPath, nil
}

// readUintFromFile reads an uint64 from the file at the given path.
func readUintFromFile(path string) (uint64, error) {
b, err := os.ReadFile(path)
Expand Down
128 changes: 124 additions & 4 deletions internal/oomwatch/watch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,42 @@ func TestNew(t *testing.T) {
}))
})

t.Run("auto discovery", func(t *testing.T) {
t.Run("success", func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

mockMemoryMax := filepath.Join(DefaultCgroupPath, MemoryMaxFile)
g.Expect(os.WriteFile(mockMemoryMax, []byte("1000000000"), 0o640)).To(Succeed())

mockMemoryCurrent := filepath.Join(DefaultCgroupPath, MemoryCurrentFile)
_, err := os.Create(mockMemoryCurrent)
g.Expect(err).ToNot(HaveOccurred())

w, err := New("", "", 1, time.Second, logr.Discard())
g.Expect(err).ToNot(HaveOccurred())

g.Expect(w).To(BeEquivalentTo(&Watcher{
memoryMax: uint64(1000000000),
memoryCurrentPath: mockMemoryCurrent,
memoryUsagePercentThreshold: 1,
interval: time.Second,
logger: logr.Discard(),
}))
})

t.Run("failure", func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

_, err := New("", "", 1, time.Second, logr.Discard())
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to discover cgroup paths"))
})
})

t.Run("validation", func(t *testing.T) {
t.Run("memory usage percentage threshold", func(t *testing.T) {
t.Run("less than 1", func(t *testing.T) {
Expand Down Expand Up @@ -82,9 +118,9 @@ func TestNew(t *testing.T) {
t.Run("does not exist", func(t *testing.T) {
g := NewWithT(t)

_, err := New("", "", 1, 50*time.Second, logr.Discard())
_, err := New("ignore", "does.not.exist", 1, 50*time.Second, logr.Discard())
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to stat memory.current \"\": lstat : no such file or directory"))
g.Expect(err.Error()).To(ContainSubstring(`failed to confirm existence of current memory usage file: lstat does.not.exist: no such file or directory`))
})
})

Expand All @@ -96,9 +132,9 @@ func TestNew(t *testing.T) {
_, err := os.Create(mockMemoryCurrent)
g.Expect(err).NotTo(HaveOccurred())

_, err = New("", mockMemoryCurrent, 1, 50*time.Second, logr.Discard())
_, err = New("does.not.exist", mockMemoryCurrent, 1, 50*time.Second, logr.Discard())
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to read memory.max \"\": open : no such file or directory"))
g.Expect(err.Error()).To(ContainSubstring(`failed to read memory usage limit: open does.not.exist: no such file or directory`))
})
})
})
Expand Down Expand Up @@ -248,3 +284,87 @@ func TestWatcher_watchForNearOOM(t *testing.T) {
}
})
}

func Test_discoverCgroupPaths(t *testing.T) {
t.Run("discovers memory max path", func(t *testing.T) {
paths := []string{
MemoryMaxFile,
MemoryLimitFile,
}
for _, p := range paths {
t.Run(p, func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

maxPathMock := filepath.Join(DefaultCgroupPath, p)
g.Expect(os.MkdirAll(filepath.Dir(maxPathMock), 0o755)).To(Succeed())
g.Expect(os.WriteFile(maxPathMock, []byte("0"), 0o640)).To(Succeed())

currentDummy := filepath.Join(DefaultCgroupPath, "dummy")
max, current, err := discoverCgroupPaths("", currentDummy)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(max).To(Equal(maxPathMock))
g.Expect(current).To(Equal(currentDummy))
})
}
})

t.Run("discovers memory current path", func(t *testing.T) {
paths := []string{
MemoryCurrentFile,
MemoryUsageFile,
}
for _, p := range paths {
t.Run(p, func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

currentPathMock := filepath.Join(DefaultCgroupPath, p)
g.Expect(os.MkdirAll(filepath.Dir(currentPathMock), 0o755)).To(Succeed())
g.Expect(os.WriteFile(currentPathMock, []byte("0"), 0o640)).To(Succeed())

maxDummy := filepath.Join(DefaultCgroupPath, "dummy")
max, current, err := discoverCgroupPaths(maxDummy, "")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(max).To(Equal(maxDummy))
g.Expect(current).To(Equal(currentPathMock))
})
}
})

t.Run("returns provided paths", func(t *testing.T) {
g := NewWithT(t)

maxDummy := filepath.Join(DefaultCgroupPath, "dummy")
currentDummy := filepath.Join(DefaultCgroupPath, "dummy")

max, current, err := discoverCgroupPaths(maxDummy, currentDummy)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(max).To(Equal(maxDummy))
g.Expect(current).To(Equal(currentDummy))
})

t.Run("returns error when no paths are discovered", func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

max, min, err := discoverCgroupPaths("", "")
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to discover cgroup paths"))
g.Expect(max).To(BeEmpty())
g.Expect(min).To(BeEmpty())
})
}

func setDefaultCgroupPath(t *testing.T) {
t.Helper()

t.Cleanup(func() {
reset := DefaultCgroupPath
DefaultCgroupPath = reset
})
DefaultCgroupPath = t.TempDir()
}
48 changes: 30 additions & 18 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,25 @@ func init() {

func main() {
var (
metricsAddr string
eventsAddr string
healthAddr string
concurrent int
requeueDependency time.Duration
gracefulShutdownTimeout time.Duration
watchAllNamespaces bool
httpRetry int
clientOptions client.Options
kubeConfigOpts client.KubeConfigOptions
featureGates feathelper.FeatureGates
logOptions logger.Options
aclOptions acl.Options
leaderElectionOptions leaderelection.Options
rateLimiterOptions helper.RateLimiterOptions
oomWatchInterval time.Duration
oomWatchMemoryThreshold uint8
metricsAddr string
eventsAddr string
healthAddr string
concurrent int
requeueDependency time.Duration
gracefulShutdownTimeout time.Duration
watchAllNamespaces bool
httpRetry int
clientOptions client.Options
kubeConfigOpts client.KubeConfigOptions
featureGates feathelper.FeatureGates
logOptions logger.Options
aclOptions acl.Options
leaderElectionOptions leaderelection.Options
rateLimiterOptions helper.RateLimiterOptions
oomWatchInterval time.Duration
oomWatchMemoryThreshold uint8
oomWatchMaxMemoryPath string
oomWatchCurrentMemoryPath string
)

flag.StringVar(&metricsAddr, "metrics-addr", ":8080",
Expand All @@ -111,6 +113,10 @@ func main() {
"The memory threshold in percentage at which the OOM watcher will trigger a graceful shutdown. Requires feature gate 'OOMWatch' to be enabled.")
flag.DurationVar(&oomWatchInterval, "oom-watch-interval", 500*time.Millisecond,
"The interval at which the OOM watcher will check for memory usage. Requires feature gate 'OOMWatch' to be enabled.")
flag.StringVar(&oomWatchMaxMemoryPath, "oom-watch-max-memory-path", "",
"The path to the cgroup memory limit file. Requires feature gate 'OOMWatch' to be enabled. If not set, the path will be automatically detected.")
flag.StringVar(&oomWatchCurrentMemoryPath, "oom-watch-current-memory-path", "",
"The path to the cgroup current memory usage file. Requires feature gate 'OOMWatch' to be enabled. If not set, the path will be automatically detected.")

clientOptions.BindFlags(flag.CommandLine)
logOptions.BindFlags(flag.CommandLine)
Expand Down Expand Up @@ -210,7 +216,13 @@ func main() {
ctx := ctrl.SetupSignalHandler()
if ok, _ := features.Enabled(features.OOMWatch); ok {
setupLog.Info("setting up OOM watcher")
ow, err := oomwatch.NewDefault(oomWatchMemoryThreshold, oomWatchInterval, ctrl.Log.WithName("OOMwatch"))
ow, err := oomwatch.New(
oomWatchMaxMemoryPath,
oomWatchCurrentMemoryPath,
oomWatchMemoryThreshold,
oomWatchInterval,
ctrl.Log.WithName("OOMwatch"),
)
if err != nil {
setupLog.Error(err, "unable to setup OOM watcher")
os.Exit(1)
Expand Down

0 comments on commit 99f6339

Please sign in to comment.