Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

oomwatch: auto-detect well known cgroup paths #641

Merged
merged 1 commit into from
Mar 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 70 additions & 20 deletions internal/oomwatch/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,26 @@ import (
"github.com/go-logr/logr"
)

const (
// DefaultCgroupPath is the default path to the cgroup directory.
var (
// DefaultCgroupPath is the default path to the cgroup directory within a
// container. It is used to discover the cgroup files if they are not
// provided.
DefaultCgroupPath = "/sys/fs/cgroup/"
// MemoryMaxFile is the cgroup memory.max filename.
)

const (
// MemoryLimitFile is the cgroup v1 memory.limit_in_bytes filepath relative
// to DefaultCgroupPath.
MemoryLimitFile = "memory/memory.limit_in_bytes"
// MemoryUsageFile is the cgroup v1 memory.usage_in_bytes filepath relative
// to DefaultCgroupPath.
MemoryUsageFile = "memory/memory.usage_in_bytes"

// MemoryMaxFile is the cgroup v2 memory.max filepath relative to
// DefaultCgroupPath.
MemoryMaxFile = "memory.max"
// MemoryCurrentFile is the cgroup memory.current filename.
// MemoryCurrentFile is the cgroup v2 memory.current filepath relative to
// DefaultCgroupPath.
MemoryCurrentFile = "memory.current"
)

Expand All @@ -61,8 +75,11 @@ type Watcher struct {
once sync.Once
}

// New returns a new Watcher.
func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (*Watcher, error) {
// New returns a new Watcher with the given configuration. If the provided
// paths are empty, it will attempt to discover the paths to the cgroup files.
// It returns an error if the paths cannot be discovered or if the provided
// configuration is invalid.
func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (_ *Watcher, err error) {
if memoryUsagePercentThreshold < 1 || memoryUsagePercentThreshold > 100 {
return nil, fmt.Errorf("memory usage percent threshold must be between 1 and 100, got %d", memoryUsagePercentThreshold)
}
Expand All @@ -71,13 +88,18 @@ func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold ui
return nil, fmt.Errorf("interval must be at least %s, got %s", minInterval, interval)
}

if _, err := os.Lstat(memoryCurrentPath); err != nil {
return nil, fmt.Errorf("failed to stat memory.current %q: %w", memoryCurrentPath, err)
memoryMaxPath, memoryCurrentPath, err = discoverCgroupPaths(memoryMaxPath, memoryCurrentPath)
if err != nil {
return nil, err
}

if _, err = os.Lstat(memoryCurrentPath); err != nil {
return nil, fmt.Errorf("failed to confirm existence of current memory usage file: %w", err)
}

memoryMax, err := readUintFromFile(memoryMaxPath)
if err != nil {
return nil, fmt.Errorf("failed to read memory.max %q: %w", memoryMaxPath, err)
return nil, fmt.Errorf("failed to read memory usage limit: %w", err)
}

return &Watcher{
Expand All @@ -89,17 +111,6 @@ func New(memoryMaxPath, memoryCurrentPath string, memoryUsagePercentThreshold ui
}, nil
}

// NewDefault returns a new Watcher with default path values.
func NewDefault(memoryUsagePercentThreshold uint8, interval time.Duration, logger logr.Logger) (*Watcher, error) {
return New(
filepath.Join(DefaultCgroupPath, MemoryMaxFile),
filepath.Join(DefaultCgroupPath, MemoryCurrentFile),
memoryUsagePercentThreshold,
interval,
logger,
)
}

// Watch returns a context that is canceled when the system reaches the
// configured memory usage threshold. Calling Watch multiple times will return
// the same context.
Expand Down Expand Up @@ -144,6 +155,45 @@ func (w *Watcher) watchForNearOOM(ctx context.Context) {
}
}

// discoverCgroupPaths attempts to automatically discover the cgroup v1 and v2
// paths for the max and current memory files when they are not provided. It
// returns the discovered and/or provided max and current paths.
// When a path is not provided and cannot be discovered, an error is returned.
func discoverCgroupPaths(memoryMaxPath, memoryCurrentPath string) (string, string, error) {
if memoryMaxPath == "" {
maxPathV1 := filepath.Join(DefaultCgroupPath, MemoryLimitFile)
maxPathV2 := filepath.Join(DefaultCgroupPath, MemoryMaxFile)

if _, err := os.Lstat(maxPathV2); err == nil {
memoryMaxPath = maxPathV2
} else if _, err = os.Lstat(maxPathV1); err == nil {
memoryMaxPath = maxPathV1
}
}
if memoryCurrentPath == "" {
currentPathV1 := filepath.Join(DefaultCgroupPath, MemoryUsageFile)
currentPathV2 := filepath.Join(DefaultCgroupPath, MemoryCurrentFile)

if _, err := os.Lstat(currentPathV2); err == nil {
memoryCurrentPath = currentPathV2
} else if _, err = os.Lstat(currentPathV1); err == nil {
memoryCurrentPath = currentPathV1
}
}

if memoryMaxPath == "" && memoryCurrentPath == "" {
return "", "", fmt.Errorf("failed to discover cgroup paths, please specify them manually")
}
if memoryMaxPath == "" {
return "", "", fmt.Errorf("failed to discover max memory path, please specify it manually")
}
if memoryCurrentPath == "" {
return "", "", fmt.Errorf("failed to discover current memory path, please specify it manually")
}

return memoryMaxPath, memoryCurrentPath, nil
}

// readUintFromFile reads an uint64 from the file at the given path.
func readUintFromFile(path string) (uint64, error) {
b, err := os.ReadFile(path)
Expand Down
128 changes: 124 additions & 4 deletions internal/oomwatch/watch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,42 @@ func TestNew(t *testing.T) {
}))
})

t.Run("auto discovery", func(t *testing.T) {
t.Run("success", func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

mockMemoryMax := filepath.Join(DefaultCgroupPath, MemoryMaxFile)
g.Expect(os.WriteFile(mockMemoryMax, []byte("1000000000"), 0o640)).To(Succeed())

mockMemoryCurrent := filepath.Join(DefaultCgroupPath, MemoryCurrentFile)
_, err := os.Create(mockMemoryCurrent)
g.Expect(err).ToNot(HaveOccurred())

w, err := New("", "", 1, time.Second, logr.Discard())
g.Expect(err).ToNot(HaveOccurred())

g.Expect(w).To(BeEquivalentTo(&Watcher{
memoryMax: uint64(1000000000),
memoryCurrentPath: mockMemoryCurrent,
memoryUsagePercentThreshold: 1,
interval: time.Second,
logger: logr.Discard(),
}))
})

t.Run("failure", func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

_, err := New("", "", 1, time.Second, logr.Discard())
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to discover cgroup paths"))
})
})

t.Run("validation", func(t *testing.T) {
t.Run("memory usage percentage threshold", func(t *testing.T) {
t.Run("less than 1", func(t *testing.T) {
Expand Down Expand Up @@ -82,9 +118,9 @@ func TestNew(t *testing.T) {
t.Run("does not exist", func(t *testing.T) {
g := NewWithT(t)

_, err := New("", "", 1, 50*time.Second, logr.Discard())
_, err := New("ignore", "does.not.exist", 1, 50*time.Second, logr.Discard())
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to stat memory.current \"\": lstat : no such file or directory"))
g.Expect(err.Error()).To(ContainSubstring(`failed to confirm existence of current memory usage file: lstat does.not.exist: no such file or directory`))
})
})

Expand All @@ -96,9 +132,9 @@ func TestNew(t *testing.T) {
_, err := os.Create(mockMemoryCurrent)
g.Expect(err).NotTo(HaveOccurred())

_, err = New("", mockMemoryCurrent, 1, 50*time.Second, logr.Discard())
_, err = New("does.not.exist", mockMemoryCurrent, 1, 50*time.Second, logr.Discard())
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to read memory.max \"\": open : no such file or directory"))
g.Expect(err.Error()).To(ContainSubstring(`failed to read memory usage limit: open does.not.exist: no such file or directory`))
})
})
})
Expand Down Expand Up @@ -248,3 +284,87 @@ func TestWatcher_watchForNearOOM(t *testing.T) {
}
})
}

func Test_discoverCgroupPaths(t *testing.T) {
t.Run("discovers memory max path", func(t *testing.T) {
paths := []string{
MemoryMaxFile,
MemoryLimitFile,
}
for _, p := range paths {
t.Run(p, func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

maxPathMock := filepath.Join(DefaultCgroupPath, p)
g.Expect(os.MkdirAll(filepath.Dir(maxPathMock), 0o755)).To(Succeed())
g.Expect(os.WriteFile(maxPathMock, []byte("0"), 0o640)).To(Succeed())

currentDummy := filepath.Join(DefaultCgroupPath, "dummy")
max, current, err := discoverCgroupPaths("", currentDummy)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(max).To(Equal(maxPathMock))
g.Expect(current).To(Equal(currentDummy))
})
}
})

t.Run("discovers memory current path", func(t *testing.T) {
paths := []string{
MemoryCurrentFile,
MemoryUsageFile,
}
for _, p := range paths {
t.Run(p, func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

currentPathMock := filepath.Join(DefaultCgroupPath, p)
g.Expect(os.MkdirAll(filepath.Dir(currentPathMock), 0o755)).To(Succeed())
g.Expect(os.WriteFile(currentPathMock, []byte("0"), 0o640)).To(Succeed())

maxDummy := filepath.Join(DefaultCgroupPath, "dummy")
max, current, err := discoverCgroupPaths(maxDummy, "")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(max).To(Equal(maxDummy))
g.Expect(current).To(Equal(currentPathMock))
})
}
})

t.Run("returns provided paths", func(t *testing.T) {
g := NewWithT(t)

maxDummy := filepath.Join(DefaultCgroupPath, "dummy")
currentDummy := filepath.Join(DefaultCgroupPath, "dummy")

max, current, err := discoverCgroupPaths(maxDummy, currentDummy)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(max).To(Equal(maxDummy))
g.Expect(current).To(Equal(currentDummy))
})

t.Run("returns error when no paths are discovered", func(t *testing.T) {
g := NewWithT(t)

setDefaultCgroupPath(t)

max, min, err := discoverCgroupPaths("", "")
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to discover cgroup paths"))
g.Expect(max).To(BeEmpty())
g.Expect(min).To(BeEmpty())
})
}

func setDefaultCgroupPath(t *testing.T) {
t.Helper()

t.Cleanup(func() {
reset := DefaultCgroupPath
DefaultCgroupPath = reset
})
DefaultCgroupPath = t.TempDir()
}
48 changes: 30 additions & 18 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,23 +70,25 @@ func init() {

func main() {
var (
metricsAddr string
eventsAddr string
healthAddr string
concurrent int
requeueDependency time.Duration
gracefulShutdownTimeout time.Duration
watchAllNamespaces bool
httpRetry int
clientOptions client.Options
kubeConfigOpts client.KubeConfigOptions
featureGates feathelper.FeatureGates
logOptions logger.Options
aclOptions acl.Options
leaderElectionOptions leaderelection.Options
rateLimiterOptions helper.RateLimiterOptions
oomWatchInterval time.Duration
oomWatchMemoryThreshold uint8
metricsAddr string
eventsAddr string
healthAddr string
concurrent int
requeueDependency time.Duration
gracefulShutdownTimeout time.Duration
watchAllNamespaces bool
httpRetry int
clientOptions client.Options
kubeConfigOpts client.KubeConfigOptions
featureGates feathelper.FeatureGates
logOptions logger.Options
aclOptions acl.Options
leaderElectionOptions leaderelection.Options
rateLimiterOptions helper.RateLimiterOptions
oomWatchInterval time.Duration
oomWatchMemoryThreshold uint8
oomWatchMaxMemoryPath string
oomWatchCurrentMemoryPath string
)

flag.StringVar(&metricsAddr, "metrics-addr", ":8080",
Expand All @@ -111,6 +113,10 @@ func main() {
"The memory threshold in percentage at which the OOM watcher will trigger a graceful shutdown. Requires feature gate 'OOMWatch' to be enabled.")
flag.DurationVar(&oomWatchInterval, "oom-watch-interval", 500*time.Millisecond,
"The interval at which the OOM watcher will check for memory usage. Requires feature gate 'OOMWatch' to be enabled.")
flag.StringVar(&oomWatchMaxMemoryPath, "oom-watch-max-memory-path", "",
"The path to the cgroup memory limit file. Requires feature gate 'OOMWatch' to be enabled. If not set, the path will be automatically detected.")
flag.StringVar(&oomWatchCurrentMemoryPath, "oom-watch-current-memory-path", "",
"The path to the cgroup current memory usage file. Requires feature gate 'OOMWatch' to be enabled. If not set, the path will be automatically detected.")

clientOptions.BindFlags(flag.CommandLine)
logOptions.BindFlags(flag.CommandLine)
Expand Down Expand Up @@ -210,7 +216,13 @@ func main() {
ctx := ctrl.SetupSignalHandler()
if ok, _ := features.Enabled(features.OOMWatch); ok {
setupLog.Info("setting up OOM watcher")
ow, err := oomwatch.NewDefault(oomWatchMemoryThreshold, oomWatchInterval, ctrl.Log.WithName("OOMwatch"))
ow, err := oomwatch.New(
oomWatchMaxMemoryPath,
oomWatchCurrentMemoryPath,
oomWatchMemoryThreshold,
oomWatchInterval,
ctrl.Log.WithName("OOMwatch"),
)
if err != nil {
setupLog.Error(err, "unable to setup OOM watcher")
os.Exit(1)
Expand Down