From 3ae7c43136600bee2bf230435078756fd4b4f1ed Mon Sep 17 00:00:00 2001 From: Hidde Beydals Date: Fri, 10 Mar 2023 14:13:01 +0100 Subject: [PATCH] oomwatch: auto detect well known cgroup paths This commit adds support for recognizing cgroup v1 paths, and allows for the configuration of alternative absolute path locations using `--oom-watch-max-memory-path` and `--oom-watch-current-memory-path`. Signed-off-by: Hidde Beydals --- internal/oomwatch/watch.go | 93 +++++++++++++++++------ internal/oomwatch/watch_test.go | 128 +++++++++++++++++++++++++++++++- main.go | 48 +++++++----- 3 files changed, 225 insertions(+), 44 deletions(-) diff --git a/internal/oomwatch/watch.go b/internal/oomwatch/watch.go index f60ba6c42..549d9a0a4 100644 --- a/internal/oomwatch/watch.go +++ b/internal/oomwatch/watch.go @@ -30,12 +30,26 @@ import ( "github.com/go-logr/logr" ) -const ( - // DefaultCgroupPath is the default path to the cgroup directory. +var ( + // DefaultCgroupPath is the default path to the cgroup directory within a + // container. It is used to discover the cgroup files if they are not + // provided. DefaultCgroupPath = "/sys/fs/cgroup/" - // MemoryMaxFile is the cgroup memory.max filename. +) + +const ( + // MemoryLimitFile is the cgroup v1 memory.limit_in_bytes filepath relative + // to DefaultCgroupPath. + MemoryLimitFile = "memory/memory.limit_in_bytes" + // MemoryUsageFile is the cgroup v1 memory.usage_in_bytes filepath relative + // to DefaultCgroupPath. + MemoryUsageFile = "memory/memory.usage_in_bytes" + + // MemoryMaxFile is the cgroup v2 memory.max filepath relative to + // DefaultCgroupPath. MemoryMaxFile = "memory.max" - // MemoryCurrentFile is the cgroup memory.current filename. + // MemoryCurrentFile is the cgroup v2 memory.current filepath relative to + // DefaultCgroupPath. MemoryCurrentFile = "memory.current" ) @@ -61,8 +75,11 @@ type Watcher struct { once sync.Once } -// New returns a new Watcher. -func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (*Watcher, error) { +// New returns a new Watcher with the given configuration. If the provided +// paths are empty, it will attempt to discover the paths to the cgroup files. +// It returns an error if the paths cannot be discovered or if the provided +// configuration is invalid. +func New(maxMemoryPath, currentMemoryPath string, memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (_ *Watcher, err error) { if memoryUsagePercentThreshold < 1 || memoryUsagePercentThreshold > 100 { return nil, fmt.Errorf("memory usage percent threshold must be between 1 and 100, got %d", memoryUsagePercentThreshold) } @@ -71,35 +88,29 @@ func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold ui return nil, fmt.Errorf("interval must be at least %s, got %s", minInterval, interval) } - if _, err := os.Lstat(memoryCurrentPath); err != nil { - return nil, fmt.Errorf("failed to stat memory.current %q: %w", memoryCurrentPath, err) + maxMemoryPath, currentMemoryPath, err = discoverCgroupPaths(maxMemoryPath, currentMemoryPath) + if err != nil { + return nil, err } - memoryMax, err := readUintFromFile(memoryMaxPath) + if _, err := os.Lstat(currentMemoryPath); err != nil { + return nil, fmt.Errorf("failed to confirm existence of current memory usage path: %w", err) + } + + memoryMax, err := readUintFromFile(maxMemoryPath) if err != nil { - return nil, fmt.Errorf("failed to read memory.max %q: %w", memoryMaxPath, err) + return nil, fmt.Errorf("failed to read maximum memory usage: %w", err) } return &Watcher{ memoryMax: memoryMax, - memoryCurrentPath: memoryCurrentPath, + memoryCurrentPath: currentMemoryPath, memoryUsagePercentThreshold: memoryUsagePercentThreshold, interval: interval, logger: logger, }, nil } -// NewDefault returns a new Watcher with default path values. -func NewDefault(memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (*Watcher, error) { - return New( - filepath.Join(DefaultCgroupPath, MemoryMaxFile), - filepath.Join(DefaultCgroupPath, MemoryCurrentFile), - memoryUsagePercentThreshold, - interval, - logger, - ) -} - // Watch returns a context that is canceled when the system reaches the // configured memory usage threshold. Calling Watch multiple times will return // the same context. @@ -144,6 +155,44 @@ func (w *Watcher) watchForNearOOM(ctx context.Context) { } } +// discoverCgroupPaths attempts to automatically discover the cgroup v1 and v2 +// paths for the max and current memory files when they are not provided. It +// returns the discovered and/or provided max and current paths. +// When a path is not provided and cannot be discovered, an error is returned. +func discoverCgroupPaths(maxMemoryPath, currentMemoryPath string) (string, string, error) { + if maxMemoryPath == "" { + maxPathV1 := filepath.Join(DefaultCgroupPath, MemoryLimitFile) + maxPathV2 := filepath.Join(DefaultCgroupPath, MemoryMaxFile) + + if _, err := os.Lstat(maxPathV2); err == nil { + maxMemoryPath = maxPathV2 + } else if _, err = os.Lstat(maxPathV1); err == nil { + maxMemoryPath = maxPathV1 + } + } + if currentMemoryPath == "" { + currentPathV1 := filepath.Join(DefaultCgroupPath, MemoryUsageFile) + currentPathV2 := filepath.Join(DefaultCgroupPath, MemoryCurrentFile) + + if _, err := os.Lstat(currentPathV2); err == nil { + currentMemoryPath = currentPathV2 + } else if _, err = os.Lstat(currentPathV1); err == nil { + currentMemoryPath = currentPathV1 + } + } + bothEmpty := maxMemoryPath == "" && currentMemoryPath == "" + if bothEmpty || maxMemoryPath == "" || currentMemoryPath == "" { + if !bothEmpty { + if maxMemoryPath == "" { + return "", "", fmt.Errorf("failed to discover max memory path, please specify it manually") + } + return "", "", fmt.Errorf("failed to discover current memory path, please specify it manually") + } + return "", "", fmt.Errorf("failed to discover cgroup paths, please specify them manually") + } + return maxMemoryPath, currentMemoryPath, nil +} + // readUintFromFile reads an uint64 from the file at the given path. func readUintFromFile(path string) (uint64, error) { b, err := os.ReadFile(path) diff --git a/internal/oomwatch/watch_test.go b/internal/oomwatch/watch_test.go index d53ab1156..424718924 100644 --- a/internal/oomwatch/watch_test.go +++ b/internal/oomwatch/watch_test.go @@ -50,6 +50,42 @@ func TestNew(t *testing.T) { })) }) + t.Run("auto discovery", func(t *testing.T) { + t.Run("success", func(t *testing.T) { + g := NewWithT(t) + + setDefaultCgroupPath(t) + + mockMemoryMax := filepath.Join(DefaultCgroupPath, MemoryMaxFile) + g.Expect(os.WriteFile(mockMemoryMax, []byte("1000000000"), 0o640)).To(Succeed()) + + mockMemoryCurrent := filepath.Join(DefaultCgroupPath, MemoryCurrentFile) + _, err := os.Create(mockMemoryCurrent) + g.Expect(err).ToNot(HaveOccurred()) + + w, err := New("", "", 1, time.Second, logr.Discard()) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(w).To(BeEquivalentTo(&Watcher{ + memoryMax: uint64(1000000000), + memoryCurrentPath: mockMemoryCurrent, + memoryUsagePercentThreshold: 1, + interval: time.Second, + logger: logr.Discard(), + })) + }) + + t.Run("failure", func(t *testing.T) { + g := NewWithT(t) + + setDefaultCgroupPath(t) + + _, err := New("", "", 1, time.Second, logr.Discard()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring("failed to discover cgroup paths")) + }) + }) + t.Run("validation", func(t *testing.T) { t.Run("memory usage percentage threshold", func(t *testing.T) { t.Run("less than 1", func(t *testing.T) { @@ -82,9 +118,9 @@ func TestNew(t *testing.T) { t.Run("does not exist", func(t *testing.T) { g := NewWithT(t) - _, err := New("", "", 1, 50*time.Second, logr.Discard()) + _, err := New("ignore", "does.not.exist", 1, 50*time.Second, logr.Discard()) g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to stat memory.current \"\": lstat : no such file or directory")) + g.Expect(err.Error()).To(ContainSubstring(`failed to confirm existence of current memory usage path: lstat does.not.exist: no such file or directory`)) }) }) @@ -96,9 +132,9 @@ func TestNew(t *testing.T) { _, err := os.Create(mockMemoryCurrent) g.Expect(err).NotTo(HaveOccurred()) - _, err = New("", mockMemoryCurrent, 1, 50*time.Second, logr.Discard()) + _, err = New("does.not.exist", mockMemoryCurrent, 1, 50*time.Second, logr.Discard()) g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to read memory.max \"\": open : no such file or directory")) + g.Expect(err.Error()).To(ContainSubstring(`failed to read maximum memory usage: open does.not.exist: no such file or directory`)) }) }) }) @@ -248,3 +284,87 @@ func TestWatcher_watchForNearOOM(t *testing.T) { } }) } + +func Test_discoverCgroupPaths(t *testing.T) { + t.Run("discovers memory max path", func(t *testing.T) { + paths := []string{ + MemoryMaxFile, + MemoryLimitFile, + } + for _, p := range paths { + t.Run(p, func(t *testing.T) { + g := NewWithT(t) + + setDefaultCgroupPath(t) + + maxPathMock := filepath.Join(DefaultCgroupPath, p) + g.Expect(os.MkdirAll(filepath.Dir(maxPathMock), 0o755)).To(Succeed()) + g.Expect(os.WriteFile(maxPathMock, []byte("0"), 0o640)).To(Succeed()) + + currentDummy := filepath.Join(DefaultCgroupPath, "dummy") + max, current, err := discoverCgroupPaths("", currentDummy) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(max).To(Equal(maxPathMock)) + g.Expect(current).To(Equal(currentDummy)) + }) + } + }) + + t.Run("discovers memory current path", func(t *testing.T) { + paths := []string{ + MemoryCurrentFile, + MemoryUsageFile, + } + for _, p := range paths { + t.Run(p, func(t *testing.T) { + g := NewWithT(t) + + setDefaultCgroupPath(t) + + currentPathMock := filepath.Join(DefaultCgroupPath, p) + g.Expect(os.MkdirAll(filepath.Dir(currentPathMock), 0o755)).To(Succeed()) + g.Expect(os.WriteFile(currentPathMock, []byte("0"), 0o640)).To(Succeed()) + + maxDummy := filepath.Join(DefaultCgroupPath, "dummy") + max, current, err := discoverCgroupPaths(maxDummy, "") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(max).To(Equal(maxDummy)) + g.Expect(current).To(Equal(currentPathMock)) + }) + } + }) + + t.Run("returns provided paths", func(t *testing.T) { + g := NewWithT(t) + + maxDummy := filepath.Join(DefaultCgroupPath, "dummy") + currentDummy := filepath.Join(DefaultCgroupPath, "dummy") + + max, current, err := discoverCgroupPaths(maxDummy, currentDummy) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(max).To(Equal(maxDummy)) + g.Expect(current).To(Equal(currentDummy)) + }) + + t.Run("returns error when no paths are discovered", func(t *testing.T) { + g := NewWithT(t) + + setDefaultCgroupPath(t) + + max, min, err := discoverCgroupPaths("", "") + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring("failed to discover cgroup paths")) + g.Expect(max).To(BeEmpty()) + g.Expect(min).To(BeEmpty()) + }) +} + +func setDefaultCgroupPath(t *testing.T) { + t.Helper() + + t.Cleanup(func() { + reset := DefaultCgroupPath + DefaultCgroupPath = reset + }) + DefaultCgroupPath = t.TempDir() +} diff --git a/main.go b/main.go index e320fec97..affb02e26 100644 --- a/main.go +++ b/main.go @@ -70,23 +70,25 @@ func init() { func main() { var ( - metricsAddr string - eventsAddr string - healthAddr string - concurrent int - requeueDependency time.Duration - gracefulShutdownTimeout time.Duration - watchAllNamespaces bool - httpRetry int - clientOptions client.Options - kubeConfigOpts client.KubeConfigOptions - featureGates feathelper.FeatureGates - logOptions logger.Options - aclOptions acl.Options - leaderElectionOptions leaderelection.Options - rateLimiterOptions helper.RateLimiterOptions - oomWatchInterval time.Duration - oomWatchMemoryThreshold uint8 + metricsAddr string + eventsAddr string + healthAddr string + concurrent int + requeueDependency time.Duration + gracefulShutdownTimeout time.Duration + watchAllNamespaces bool + httpRetry int + clientOptions client.Options + kubeConfigOpts client.KubeConfigOptions + featureGates feathelper.FeatureGates + logOptions logger.Options + aclOptions acl.Options + leaderElectionOptions leaderelection.Options + rateLimiterOptions helper.RateLimiterOptions + oomWatchInterval time.Duration + oomWatchMemoryThreshold uint8 + oomWatchMaxMemoryPath string + oomWatchCurrentMemoryPath string ) flag.StringVar(&metricsAddr, "metrics-addr", ":8080", @@ -111,6 +113,10 @@ func main() { "The memory threshold in percentage at which the OOM watcher will trigger a graceful shutdown. Requires feature gate 'OOMWatch' to be enabled.") flag.DurationVar(&oomWatchInterval, "oom-watch-interval", 500*time.Millisecond, "The interval at which the OOM watcher will check for memory usage. Requires feature gate 'OOMWatch' to be enabled.") + flag.StringVar(&oomWatchMaxMemoryPath, "oom-watch-max-memory-path", "", + "The path to the cgroup memory limit file. Requires feature gate 'OOMWatch' to be enabled. If not set, the path will be automatically detected.") + flag.StringVar(&oomWatchCurrentMemoryPath, "oom-watch-current-memory-path", "", + "The path to the cgroup current memory usage file. Requires feature gate 'OOMWatch' to be enabled. If not set, the path will be automatically detected.") clientOptions.BindFlags(flag.CommandLine) logOptions.BindFlags(flag.CommandLine) @@ -210,7 +216,13 @@ func main() { ctx := ctrl.SetupSignalHandler() if ok, _ := features.Enabled(features.OOMWatch); ok { setupLog.Info("setting up OOM watcher") - ow, err := oomwatch.NewDefault(oomWatchMemoryThreshold, oomWatchInterval, ctrl.Log.WithName("OOMwatch")) + ow, err := oomwatch.New( + oomWatchMaxMemoryPath, + oomWatchCurrentMemoryPath, + oomWatchMemoryThreshold, + oomWatchInterval, + ctrl.Log.WithName("OOMwatch"), + ) if err != nil { setupLog.Error(err, "unable to setup OOM watcher") os.Exit(1)