Skip to content

Commit

Permalink
Merge pull request #21523 from umohnani8/memory-final
Browse files Browse the repository at this point in the history
Use persist dir for oom file
  • Loading branch information
openshift-merge-bot[bot] authored Feb 12, 2024
2 parents 1e006b2 + 667311c commit 01bd79b
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 14 deletions.
10 changes: 10 additions & 0 deletions libpod/container_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,13 @@ func (c *Container) execExitFileDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "exit")
}

// execPersistDir gets the path to the container's persist directory
// The persist directory container the exit file and oom file (if oomkilled)
// of a container
func (c *Container) execPersistDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "persist", c.ID())
}

// execOCILog returns the file path for the exec sessions oci log
func (c *Container) execOCILog(sessionID string) string {
if !c.ociRuntime.SupportsJSONErrors() {
Expand Down Expand Up @@ -917,6 +924,9 @@ func (c *Container) createExecBundle(sessionID string) (retErr error) {
return fmt.Errorf("creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err)
}
}
if err := os.MkdirAll(c.execPersistDir(sessionID), execDirPermission); err != nil {
return fmt.Errorf("creating OCI runtime persist directory path %s: %w", c.execPersistDir(sessionID), err)
}
return nil
}

Expand Down
25 changes: 19 additions & 6 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -145,6 +146,10 @@ func (c *Container) exitFilePath() (string, error) {
return c.ociRuntime.ExitFilePath(c)
}

func (c *Container) oomFilePath() (string, error) {
return c.ociRuntime.OOMFilePath(c)
}

// Wait for the container's exit file to appear.
// When it does, update our state based on it.
func (c *Container) waitForExitFileAndSync() error {
Expand Down Expand Up @@ -181,6 +186,7 @@ func (c *Container) waitForExitFileAndSync() error {
// Handle the container exit file.
// The exit file is used to supply container exit time and exit code.
// This assumes the exit file already exists.
// Also check for an oom file to determine if the container was oom killed or not.
func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
c.state.FinishedTime = ctime.Created(fi)
statusCodeStr, err := os.ReadFile(exitFile)
Expand All @@ -194,7 +200,10 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
}
c.state.ExitCode = int32(statusCode)

oomFilePath := filepath.Join(c.bundlePath(), "oom")
oomFilePath, err := c.oomFilePath()
if err != nil {
return err
}
if _, err = os.Stat(oomFilePath); err == nil {
c.state.OOMKilled = true
}
Expand Down Expand Up @@ -758,11 +767,6 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err)
}

oomFile := filepath.Join(c.bundlePath(), "oom")
if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("removing container %s OOM file: %w", c.ID(), err)
}

// Remove the exit file so we don't leak memory in tmpfs
exitFile, err := c.exitFilePath()
if err != nil {
Expand All @@ -772,6 +776,15 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s exit file: %w", c.ID(), err)
}

// Remove the oom file
oomFile, err := c.oomFilePath()
if err != nil {
return err
}
if err := os.Remove(oomFile); err != nil && !errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("removing container %s oom file: %w", c.ID(), err)
}

return nil
}

Expand Down
6 changes: 6 additions & 0 deletions libpod/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ type OCIRuntime interface { //nolint:interfacebloat
// This is the path to that file for a given container.
ExitFilePath(ctr *Container) (string, error)

// OOMFilePath is the path to a container's oom file if it was oom killed.
// An oom file is only created when the container is oom killed. The existence
// of this file means that the container was oom killed.
// This is the path to that file for a given container.
OOMFilePath(ctr *Container) (string, error)

// RuntimeInfo returns verbose information about the runtime.
RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error)

Expand Down
37 changes: 30 additions & 7 deletions libpod/oci_conmon_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ type ConmonOCIRuntime struct {
supportsKVM bool
supportsNoCgroups bool
enableKeyring bool
persistDir string
}

// Make a new Conmon-based OCI runtime with the given options.
Expand Down Expand Up @@ -143,13 +144,15 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime
}

runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
// The persist-dir is where conmon writes the exit file and oom file (if oom killed), we join the container ID to this path later on
runtime.persistDir = filepath.Join(runtime.tmpDir, "persist")

// Create the exit files and attach sockets directories
if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
// The directory is allowed to exist
if !os.IsExist(err) {
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
}
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
}
if err := os.MkdirAll(runtime.persistDir, 0750); err != nil {
return nil, fmt.Errorf("creating OCI runtime persist directory: %w", err)
}
return runtime, nil
}
Expand Down Expand Up @@ -940,6 +943,12 @@ func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
return filepath.Join(r.exitsDir, ctr.ID()), nil
}

// OOMFilePath is the path to a container's oom file.
// The oom file will only exist if the container was oom killed.
func (r *ConmonOCIRuntime) OOMFilePath(ctr *Container) (string, error) {
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}

// RuntimeInfo provides information on the runtime.
func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
runtimePackage := version.Package(r.path)
Expand Down Expand Up @@ -1107,7 +1116,11 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
}

args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
persistDir := filepath.Join(r.persistDir, ctr.ID())
args, err := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, persistDir, ociLog, ctr.LogDriver(), logTag)
if err != nil {
return 0, err
}

if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
Expand Down Expand Up @@ -1363,7 +1376,16 @@ func (r *ConmonOCIRuntime) configureConmonEnv() ([]string, error) {
}

// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
// func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
// Make the persists directory for the container after the ctr ID is appended to it in the caller
// This is needed as conmon writes the exit and oom file in the given persist directory path as just "exit" and "oom"
// So creating a directory with the container ID under the persist dir will help keep track of which container the
// exit and oom files belong to.
if err := os.MkdirAll(persistDir, 0750); err != nil {
return nil, fmt.Errorf("creating OCI runtime oom files directory for ctr %q: %w", ctr.ID(), err)
}

// set the conmon API version to be able to use the correct sync struct keys
args := []string{
"--api-version", "1",
Expand All @@ -1374,6 +1396,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
"-p", pidPath,
"-n", ctr.Name(),
"--exit-dir", exitDir,
"--persist-dir", persistDir,
"--full-attach",
}
if len(r.runtimeFlags) > 0 {
Expand Down Expand Up @@ -1436,7 +1459,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
logrus.Debugf("Running with no Cgroups")
args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
}
return args
return args, nil
}

// newPipe creates a unix socket pair for communication.
Expand Down
5 changes: 4 additions & 1 deletion libpod/oci_conmon_exec_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,10 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
}
defer processFile.Close()

args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
args, err := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), c.execPersistDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
if err != nil {
return nil, nil, err
}

preserveFDs, filesToClose, extraFiles, err := getPreserveFdExtraFiles(options.PreserveFD, options.PreserveFDs)
if err != nil {
Expand Down
9 changes: 9 additions & 0 deletions libpod/oci_missing.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ type MissingRuntime struct {
name string
// exitsDir is the directory for exit files.
exitsDir string
// persistDir is the directory for exit and oom files.
persistDir string
}

// Get a new MissingRuntime for the given name.
Expand All @@ -52,6 +54,7 @@ func getMissingRuntime(name string, r *Runtime) OCIRuntime {
newRuntime := new(MissingRuntime)
newRuntime.name = name
newRuntime.exitsDir = filepath.Join(r.config.Engine.TmpDir, "exits")
newRuntime.persistDir = filepath.Join(r.config.Engine.TmpDir, "persist")

missingRuntimes[name] = newRuntime

Expand Down Expand Up @@ -222,6 +225,12 @@ func (r *MissingRuntime) ExitFilePath(ctr *Container) (string, error) {
return filepath.Join(r.exitsDir, ctr.ID()), nil
}

// OOMFilePath returns the oom file path for a container.
// The oom file will only exist if the container was oom killed.
func (r *MissingRuntime) OOMFilePath(ctr *Container) (string, error) {
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}

// RuntimeInfo returns information on the missing runtime
func (r *MissingRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
ocirt := define.OCIRuntimeInfo{
Expand Down
35 changes: 35 additions & 0 deletions test/e2e/run_memory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,39 @@ var _ = Describe("Podman run memory", func() {
Expect(session.OutputToString()).To(Equal(limit))
})
}

It("podman run memory test on oomkilled container", func() {
mem := SystemExec("cat", []string{"/proc/sys/vm/overcommit_memory"})
mem.WaitWithDefaultTimeout()
if mem.OutputToString() != "0" {
Skip("overcommit memory is not set to 0")
}

ctrName := "oomkilled-ctr"
// create a container that gets oomkilled
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--read-only", "--memory-swap=20m", "--memory=20m", "--oom-score-adj=1000", ALPINE, "sort", "/dev/urandom"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitWithError())

inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("true"))
Expect(inspect.OutputToString()).Should(ContainSubstring("137"))
})

It("podman run memory test on successfully exited container", func() {
ctrName := "success-ctr"
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--memory=40m", ALPINE, "echo", "hello"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())

inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("false"))
Expect(inspect.OutputToString()).Should(ContainSubstring("0"))
})
})

0 comments on commit 01bd79b

Please sign in to comment.