Skip to content

Commit

Permalink
oomwatch: auto detect well known cgroup paths
Browse files Browse the repository at this point in the history
This commit adds support for recognizing cgroup v1 paths, and allows for
the configuration of alternative absolute path locations using
`--oom-watch-max-memory-path` and `--oom-watch-current-memory-path`.

Signed-off-by: Hidde Beydals <hidde@hhh.computer>
  • Loading branch information
hiddeco committed Mar 10, 2023
1 parent 629cd06 commit 3ae7c43
Show file tree
Hide file tree
Showing 3 changed files with 225 additions and 44 deletions.
93 changes: 71 additions & 22 deletions internal/oomwatch/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,26 @@ import (
"github.com/go-logr/logr"
)

const (
// DefaultCgroupPath is the default path to the cgroup directory.
var (
// DefaultCgroupPath is the default path to the cgroup directory within a
// container. It is used to discover the cgroup files if they are not
// provided.
DefaultCgroupPath = "/sys/fs/cgroup/"
// MemoryMaxFile is the cgroup memory.max filename.
)

const (
// MemoryLimitFile is the cgroup v1 memory.limit_in_bytes filepath relative
// to DefaultCgroupPath.
MemoryLimitFile = "memory/memory.limit_in_bytes"
// MemoryUsageFile is the cgroup v1 memory.usage_in_bytes filepath relative
// to DefaultCgroupPath.
MemoryUsageFile = "memory/memory.usage_in_bytes"

// MemoryMaxFile is the cgroup v2 memory.max filepath relative to
// DefaultCgroupPath.
MemoryMaxFile = "memory.max"
// MemoryCurrentFile is the cgroup memory.current filename.
// MemoryCurrentFile is the cgroup v2 memory.current filepath relative to
// DefaultCgroupPath.
MemoryCurrentFile = "memory.current"
)

Expand All @@ -61,8 +75,11 @@ type Watcher struct {
once sync.Once
}

// New returns a new Watcher.
func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (*Watcher, error) {
// New returns a new Watcher with the given configuration. If the provided
// paths are empty, it will attempt to discover the paths to the cgroup files.
// It returns an error if the paths cannot be discovered or if the provided
// configuration is invalid.
func New(maxMemoryPath, currentMemoryPath string, memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (_ *Watcher, err error) {
if memoryUsagePercentThreshold < 1 || memoryUsagePercentThreshold > 100 {
return nil, fmt.Errorf("memory usage percent threshold must be between 1 and 100, got %d", memoryUsagePercentThreshold)
}
Expand All @@ -71,35 +88,29 @@ func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold ui
return nil, fmt.Errorf("interval must be at least %s, got %s", minInterval, interval)
}

if _, err := os.Lstat(memoryCurrentPath); err != nil {
return nil, fmt.Errorf("failed to stat memory.current %q: %w", memoryCurrentPath, err)
maxMemoryPath, currentMemoryPath, err = discoverCgroupPaths(maxMemoryPath, currentMemoryPath)
if err != nil {
return nil, err
}

memoryMax, err := readUintFromFile(memoryMaxPath)
if _, err := os.Lstat(currentMemoryPath); err != nil {
return nil, fmt.Errorf("failed to confirm existence of current memory usage path: %w", err)
}

memoryMax, err := readUintFromFile(maxMemoryPath)
if err != nil {
return nil, fmt.Errorf("failed to read memory.max %q: %w", memoryMaxPath, err)
return nil, fmt.Errorf("failed to read maximum memory usage: %w", err)
}

return &Watcher{
memoryMax: memoryMax,
memoryCurrentPath: memoryCurrentPath,
memoryCurrentPath: currentMemoryPath,
memoryUsagePercentThreshold: memoryUsagePercentThreshold,
interval: interval,
logger: logger,
}, nil
}

// NewDefault returns a new Watcher with default path values.
func NewDefault(memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (*Watcher, error) {
return New(
filepath.Join(DefaultCgroupPath, MemoryMaxFile),
filepath.Join(DefaultCgroupPath, MemoryCurrentFile),
memoryUsagePercentThreshold,
interval,
logger,
)
}

// Watch returns a context that is canceled when the system reaches the
// configured memory usage threshold. Calling Watch multiple times will return
// the same context.
Expand Down Expand Up @@ -144,6 +155,44 @@ func (w *Watcher) watchForNearOOM(ctx context.Context) {
}
}

// discoverCgroupPaths attempts to automatically discover the cgroup v1 and v2
// paths for the max and current memory files when they are not provided. It
// returns the discovered and/or provided max and current paths.
// When a path is not provided and cannot be discovered, an error is returned.
func discoverCgroupPaths(maxMemoryPath, currentMemoryPath string) (string, string, error) {
if maxMemoryPath == "" {
maxPathV1 := filepath.Join(DefaultCgroupPath, MemoryLimitFile)
maxPathV2 := filepath.Join(DefaultCgroupPath, MemoryMaxFile)

if _, err := os.Lstat(maxPathV2); err == nil {
maxMemoryPath = maxPathV2
} else if _, err = os.Lstat(maxPathV1); err == nil {
maxMemoryPath = maxPathV1
}
}
if currentMemoryPath == "" {
currentPathV1 := filepath.Join(DefaultCgroupPath, MemoryUsageFile)
currentPathV2 := filepath.Join(DefaultCgroupPath, MemoryCurrentFile)

if _, err := os.Lstat(currentPathV2); err == nil {
currentMemoryPath = currentPathV2
} else if _, err = os.Lstat(currentPathV1); err == nil {
currentMemoryPath = currentPathV1
}
}
bothEmpty := maxMemoryPath == "" && currentMemoryPath == ""
if bothEmpty || maxMemoryPath == "" || currentMemoryPath == "" {
if !bothEmpty {
if maxMemoryPath == "" {
return "", "", fmt.Errorf("failed to discover max memory path, please specify it manually")
}
return "", "", fmt.Errorf("failed to discover current memory path, please specify it manually")
}
return "", "", fmt.Errorf("failed to discover cgroup paths, please specify them manually")
}
return maxMemoryPath, currentMemoryPath, nil
}

// readUintFromFile reads an uint64 from the file at the given path.
func readUintFromFile(path string) (uint64, error) {
b, err := os.ReadFile(path)
Expand Down
128 changes: 124 additions & 4 deletions internal/oomwatch/watch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,42 @@ func TestNew(t *testing.T) {
}))
})

t.Run("auto discovery", func(t *testing.T) {
t.Run("success", func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

mockMemoryMax := filepath.Join(DefaultCgroupPath, MemoryMaxFile)
g.Expect(os.WriteFile(mockMemoryMax, []byte("1000000000"), 0o640)).To(Succeed())

mockMemoryCurrent := filepath.Join(DefaultCgroupPath, MemoryCurrentFile)
_, err := os.Create(mockMemoryCurrent)
g.Expect(err).ToNot(HaveOccurred())

w, err := New("", "", 1, time.Second, logr.Discard())
g.Expect(err).ToNot(HaveOccurred())

g.Expect(w).To(BeEquivalentTo(&Watcher{
memoryMax: uint64(1000000000),
memoryCurrentPath: mockMemoryCurrent,
memoryUsagePercentThreshold: 1,
interval: time.Second,
logger: logr.Discard(),
}))
})

t.Run("failure", func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

_, err := New("", "", 1, time.Second, logr.Discard())
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to discover cgroup paths"))
})
})

t.Run("validation", func(t *testing.T) {
t.Run("memory usage percentage threshold", func(t *testing.T) {
t.Run("less than 1", func(t *testing.T) {
Expand Down Expand Up @@ -82,9 +118,9 @@ func TestNew(t *testing.T) {
t.Run("does not exist", func(t *testing.T) {
g := NewWithT(t)

_, err := New("", "", 1, 50*time.Second, logr.Discard())
_, err := New("ignore", "does.not.exist", 1, 50*time.Second, logr.Discard())
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to stat memory.current \"\": lstat : no such file or directory"))
g.Expect(err.Error()).To(ContainSubstring(`failed to confirm existence of current memory usage path: lstat does.not.exist: no such file or directory`))
})
})

Expand All @@ -96,9 +132,9 @@ func TestNew(t *testing.T) {
_, err := os.Create(mockMemoryCurrent)
g.Expect(err).NotTo(HaveOccurred())

_, err = New("", mockMemoryCurrent, 1, 50*time.Second, logr.Discard())
_, err = New("does.not.exist", mockMemoryCurrent, 1, 50*time.Second, logr.Discard())
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to read memory.max \"\": open : no such file or directory"))
g.Expect(err.Error()).To(ContainSubstring(`failed to read maximum memory usage: open does.not.exist: no such file or directory`))
})
})
})
Expand Down Expand Up @@ -248,3 +284,87 @@ func TestWatcher_watchForNearOOM(t *testing.T) {
}
})
}

func Test_discoverCgroupPaths(t *testing.T) {
t.Run("discovers memory max path", func(t *testing.T) {
paths := []string{
MemoryMaxFile,
MemoryLimitFile,
}
for _, p := range paths {
t.Run(p, func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

maxPathMock := filepath.Join(DefaultCgroupPath, p)
g.Expect(os.MkdirAll(filepath.Dir(maxPathMock), 0o755)).To(Succeed())
g.Expect(os.WriteFile(maxPathMock, []byte("0"), 0o640)).To(Succeed())

currentDummy := filepath.Join(DefaultCgroupPath, "dummy")
max, current, err := discoverCgroupPaths("", currentDummy)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(max).To(Equal(maxPathMock))
g.Expect(current).To(Equal(currentDummy))
})
}
})

t.Run("discovers memory current path", func(t *testing.T) {
paths := []string{
MemoryCurrentFile,
MemoryUsageFile,
}
for _, p := range paths {
t.Run(p, func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

currentPathMock := filepath.Join(DefaultCgroupPath, p)
g.Expect(os.MkdirAll(filepath.Dir(currentPathMock), 0o755)).To(Succeed())
g.Expect(os.WriteFile(currentPathMock, []byte("0"), 0o640)).To(Succeed())

maxDummy := filepath.Join(DefaultCgroupPath, "dummy")
max, current, err := discoverCgroupPaths(maxDummy, "")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(max).To(Equal(maxDummy))
g.Expect(current).To(Equal(currentPathMock))
})
}
})

t.Run("returns provided paths", func(t *testing.T) {
g := NewWithT(t)

maxDummy := filepath.Join(DefaultCgroupPath, "dummy")
currentDummy := filepath.Join(DefaultCgroupPath, "dummy")

max, current, err := discoverCgroupPaths(maxDummy, currentDummy)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(max).To(Equal(maxDummy))
g.Expect(current).To(Equal(currentDummy))
})

t.Run("returns error when no paths are discovered", func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

max, min, err := discoverCgroupPaths("", "")
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to discover cgroup paths"))
g.Expect(max).To(BeEmpty())
g.Expect(min).To(BeEmpty())
})
}

func setDefaultCgroupPath(t *testing.T) {
t.Helper()

t.Cleanup(func() {
reset := DefaultCgroupPath
DefaultCgroupPath = reset
})
DefaultCgroupPath = t.TempDir()
}
48 changes: 30 additions & 18 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,25 @@ func init() {

func main() {
var (
metricsAddr string
eventsAddr string
healthAddr string
concurrent int
requeueDependency time.Duration
gracefulShutdownTimeout time.Duration
watchAllNamespaces bool
httpRetry int
clientOptions client.Options
kubeConfigOpts client.KubeConfigOptions
featureGates feathelper.FeatureGates
logOptions logger.Options
aclOptions acl.Options
leaderElectionOptions leaderelection.Options
rateLimiterOptions helper.RateLimiterOptions
oomWatchInterval time.Duration
oomWatchMemoryThreshold uint8
metricsAddr string
eventsAddr string
healthAddr string
concurrent int
requeueDependency time.Duration
gracefulShutdownTimeout time.Duration
watchAllNamespaces bool
httpRetry int
clientOptions client.Options
kubeConfigOpts client.KubeConfigOptions
featureGates feathelper.FeatureGates
logOptions logger.Options
aclOptions acl.Options
leaderElectionOptions leaderelection.Options
rateLimiterOptions helper.RateLimiterOptions
oomWatchInterval time.Duration
oomWatchMemoryThreshold uint8
oomWatchMaxMemoryPath string
oomWatchCurrentMemoryPath string
)

flag.StringVar(&metricsAddr, "metrics-addr", ":8080",
Expand All @@ -111,6 +113,10 @@ func main() {
"The memory threshold in percentage at which the OOM watcher will trigger a graceful shutdown. Requires feature gate 'OOMWatch' to be enabled.")
flag.DurationVar(&oomWatchInterval, "oom-watch-interval", 500*time.Millisecond,
"The interval at which the OOM watcher will check for memory usage. Requires feature gate 'OOMWatch' to be enabled.")
flag.StringVar(&oomWatchMaxMemoryPath, "oom-watch-max-memory-path", "",
"The path to the cgroup memory limit file. Requires feature gate 'OOMWatch' to be enabled. If not set, the path will be automatically detected.")
flag.StringVar(&oomWatchCurrentMemoryPath, "oom-watch-current-memory-path", "",
"The path to the cgroup current memory usage file. Requires feature gate 'OOMWatch' to be enabled. If not set, the path will be automatically detected.")

clientOptions.BindFlags(flag.CommandLine)
logOptions.BindFlags(flag.CommandLine)
Expand Down Expand Up @@ -210,7 +216,13 @@ func main() {
ctx := ctrl.SetupSignalHandler()
if ok, _ := features.Enabled(features.OOMWatch); ok {
setupLog.Info("setting up OOM watcher")
ow, err := oomwatch.NewDefault(oomWatchMemoryThreshold, oomWatchInterval, ctrl.Log.WithName("OOMwatch"))
ow, err := oomwatch.New(
oomWatchMaxMemoryPath,
oomWatchCurrentMemoryPath,
oomWatchMemoryThreshold,
oomWatchInterval,
ctrl.Log.WithName("OOMwatch"),
)
if err != nil {
setupLog.Error(err, "unable to setup OOM watcher")
os.Exit(1)
Expand Down

0 comments on commit 3ae7c43

Please sign in to comment.