From 667311c7d509794146114e05e7699bf3109bcee3 Mon Sep 17 00:00:00 2001 From: Urvashi Mohnani Date: Mon, 13 Mar 2023 10:51:01 -0400 Subject: [PATCH] Use persist dir for oom file Conmon writes the exit file and oom file (if container was oom killed) to the persist directory. This directory is retained across reboots as well. Update podman to create a persist-dir/ctr-id for the exit and oom files for each container to be written to. The oom state of container is set after reading the files from the persist-dir/ctr-id directory. The exit code still continues to read the exit file from the exits directory. Signed-off-by: Urvashi Mohnani --- libpod/container_exec.go | 10 +++++++++ libpod/container_internal.go | 25 +++++++++++++++------ libpod/oci.go | 6 ++++++ libpod/oci_conmon_common.go | 37 ++++++++++++++++++++++++++------ libpod/oci_conmon_exec_common.go | 5 ++++- libpod/oci_missing.go | 9 ++++++++ test/e2e/run_memory_test.go | 35 ++++++++++++++++++++++++++++++ 7 files changed, 113 insertions(+), 14 deletions(-) diff --git a/libpod/container_exec.go b/libpod/container_exec.go index 0789d06f4e..42f6eae9e4 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -890,6 +890,13 @@ func (c *Container) execExitFileDir(sessionID string) string { return filepath.Join(c.execBundlePath(sessionID), "exit") } +// execPersistDir gets the path to the container's persist directory +// The persist directory container the exit file and oom file (if oomkilled) +// of a container +func (c *Container) execPersistDir(sessionID string) string { + return filepath.Join(c.execBundlePath(sessionID), "persist", c.ID()) +} + // execOCILog returns the file path for the exec sessions oci log func (c *Container) execOCILog(sessionID string) string { if !c.ociRuntime.SupportsJSONErrors() { @@ -917,6 +924,9 @@ func (c *Container) createExecBundle(sessionID string) (retErr error) { return fmt.Errorf("creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err) } } + if err := os.MkdirAll(c.execPersistDir(sessionID), execDirPermission); err != nil { + return fmt.Errorf("creating OCI runtime persist directory path %s: %w", c.execPersistDir(sessionID), err) + } return nil } diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 99f296c4de..a7d07da537 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "io" + "io/fs" "os" "path/filepath" "strconv" @@ -145,6 +146,10 @@ func (c *Container) exitFilePath() (string, error) { return c.ociRuntime.ExitFilePath(c) } +func (c *Container) oomFilePath() (string, error) { + return c.ociRuntime.OOMFilePath(c) +} + // Wait for the container's exit file to appear. // When it does, update our state based on it. func (c *Container) waitForExitFileAndSync() error { @@ -181,6 +186,7 @@ func (c *Container) waitForExitFileAndSync() error { // Handle the container exit file. // The exit file is used to supply container exit time and exit code. // This assumes the exit file already exists. +// Also check for an oom file to determine if the container was oom killed or not. func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { c.state.FinishedTime = ctime.Created(fi) statusCodeStr, err := os.ReadFile(exitFile) @@ -194,7 +200,10 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { } c.state.ExitCode = int32(statusCode) - oomFilePath := filepath.Join(c.bundlePath(), "oom") + oomFilePath, err := c.oomFilePath() + if err != nil { + return err + } if _, err = os.Stat(oomFilePath); err == nil { c.state.OOMKilled = true } @@ -758,11 +767,6 @@ func (c *Container) removeConmonFiles() error { return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err) } - oomFile := filepath.Join(c.bundlePath(), "oom") - if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) { - return fmt.Errorf("removing container %s OOM file: %w", c.ID(), err) - } - // Remove the exit file so we don't leak memory in tmpfs exitFile, err := c.exitFilePath() if err != nil { @@ -772,6 +776,15 @@ func (c *Container) removeConmonFiles() error { return fmt.Errorf("removing container %s exit file: %w", c.ID(), err) } + // Remove the oom file + oomFile, err := c.oomFilePath() + if err != nil { + return err + } + if err := os.Remove(oomFile); err != nil && !errors.Is(err, fs.ErrNotExist) { + return fmt.Errorf("removing container %s oom file: %w", c.ID(), err) + } + return nil } diff --git a/libpod/oci.go b/libpod/oci.go index 10cb5556bb..a10056779c 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -149,6 +149,12 @@ type OCIRuntime interface { //nolint:interfacebloat // This is the path to that file for a given container. ExitFilePath(ctr *Container) (string, error) + // OOMFilePath is the path to a container's oom file if it was oom killed. + // An oom file is only created when the container is oom killed. The existence + // of this file means that the container was oom killed. + // This is the path to that file for a given container. + OOMFilePath(ctr *Container) (string, error) + // RuntimeInfo returns verbose information about the runtime. RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) diff --git a/libpod/oci_conmon_common.go b/libpod/oci_conmon_common.go index e77e0b2217..73179378f6 100644 --- a/libpod/oci_conmon_common.go +++ b/libpod/oci_conmon_common.go @@ -64,6 +64,7 @@ type ConmonOCIRuntime struct { supportsKVM bool supportsNoCgroups bool enableKeyring bool + persistDir string } // Make a new Conmon-based OCI runtime with the given options. @@ -143,13 +144,15 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime } runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits") + // The persist-dir is where conmon writes the exit file and oom file (if oom killed), we join the container ID to this path later on + runtime.persistDir = filepath.Join(runtime.tmpDir, "persist") // Create the exit files and attach sockets directories if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil { - // The directory is allowed to exist - if !os.IsExist(err) { - return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err) - } + return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err) + } + if err := os.MkdirAll(runtime.persistDir, 0750); err != nil { + return nil, fmt.Errorf("creating OCI runtime persist directory: %w", err) } return runtime, nil } @@ -940,6 +943,12 @@ func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) { return filepath.Join(r.exitsDir, ctr.ID()), nil } +// OOMFilePath is the path to a container's oom file. +// The oom file will only exist if the container was oom killed. +func (r *ConmonOCIRuntime) OOMFilePath(ctr *Container) (string, error) { + return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil +} + // RuntimeInfo provides information on the runtime. func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) { runtimePackage := version.Package(r.path) @@ -1107,7 +1116,11 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co pidfile = filepath.Join(ctr.state.RunDir, "pidfile") } - args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag) + persistDir := filepath.Join(r.persistDir, ctr.ID()) + args, err := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, persistDir, ociLog, ctr.LogDriver(), logTag) + if err != nil { + return 0, err + } if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" { args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket)) @@ -1363,7 +1376,16 @@ func (r *ConmonOCIRuntime) configureConmonEnv() ([]string, error) { } // sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI -func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string { +// func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) { +func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) { + // Make the persists directory for the container after the ctr ID is appended to it in the caller + // This is needed as conmon writes the exit and oom file in the given persist directory path as just "exit" and "oom" + // So creating a directory with the container ID under the persist dir will help keep track of which container the + // exit and oom files belong to. + if err := os.MkdirAll(persistDir, 0750); err != nil { + return nil, fmt.Errorf("creating OCI runtime oom files directory for ctr %q: %w", ctr.ID(), err) + } + // set the conmon API version to be able to use the correct sync struct keys args := []string{ "--api-version", "1", @@ -1374,6 +1396,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p "-p", pidPath, "-n", ctr.Name(), "--exit-dir", exitDir, + "--persist-dir", persistDir, "--full-attach", } if len(r.runtimeFlags) > 0 { @@ -1436,7 +1459,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p logrus.Debugf("Running with no Cgroups") args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled") } - return args + return args, nil } // newPipe creates a unix socket pair for communication. diff --git a/libpod/oci_conmon_exec_common.go b/libpod/oci_conmon_exec_common.go index 3e6fbcbc8a..ca917ff1a3 100644 --- a/libpod/oci_conmon_exec_common.go +++ b/libpod/oci_conmon_exec_common.go @@ -387,7 +387,10 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex } defer processFile.Close() - args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag) + args, err := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), c.execPersistDir(sessionID), ociLog, define.NoLogging, c.config.LogTag) + if err != nil { + return nil, nil, err + } preserveFDs, filesToClose, extraFiles, err := getPreserveFdExtraFiles(options.PreserveFD, options.PreserveFDs) if err != nil { diff --git a/libpod/oci_missing.go b/libpod/oci_missing.go index 2879b871ec..0ac6417f58 100644 --- a/libpod/oci_missing.go +++ b/libpod/oci_missing.go @@ -29,6 +29,8 @@ type MissingRuntime struct { name string // exitsDir is the directory for exit files. exitsDir string + // persistDir is the directory for exit and oom files. + persistDir string } // Get a new MissingRuntime for the given name. @@ -52,6 +54,7 @@ func getMissingRuntime(name string, r *Runtime) OCIRuntime { newRuntime := new(MissingRuntime) newRuntime.name = name newRuntime.exitsDir = filepath.Join(r.config.Engine.TmpDir, "exits") + newRuntime.persistDir = filepath.Join(r.config.Engine.TmpDir, "persist") missingRuntimes[name] = newRuntime @@ -222,6 +225,12 @@ func (r *MissingRuntime) ExitFilePath(ctr *Container) (string, error) { return filepath.Join(r.exitsDir, ctr.ID()), nil } +// OOMFilePath returns the oom file path for a container. +// The oom file will only exist if the container was oom killed. +func (r *MissingRuntime) OOMFilePath(ctr *Container) (string, error) { + return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil +} + // RuntimeInfo returns information on the missing runtime func (r *MissingRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) { ocirt := define.OCIRuntimeInfo{ diff --git a/test/e2e/run_memory_test.go b/test/e2e/run_memory_test.go index b7e9703d7b..0fe0826683 100644 --- a/test/e2e/run_memory_test.go +++ b/test/e2e/run_memory_test.go @@ -70,4 +70,39 @@ var _ = Describe("Podman run memory", func() { Expect(session.OutputToString()).To(Equal(limit)) }) } + + It("podman run memory test on oomkilled container", func() { + mem := SystemExec("cat", []string{"/proc/sys/vm/overcommit_memory"}) + mem.WaitWithDefaultTimeout() + if mem.OutputToString() != "0" { + Skip("overcommit memory is not set to 0") + } + + ctrName := "oomkilled-ctr" + // create a container that gets oomkilled + session := podmanTest.Podman([]string{"run", "--name", ctrName, "--read-only", "--memory-swap=20m", "--memory=20m", "--oom-score-adj=1000", ALPINE, "sort", "/dev/urandom"}) + session.WaitWithDefaultTimeout() + Expect(session).Should(ExitWithError()) + + inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName})) + inspect.WaitWithDefaultTimeout() + Expect(inspect).Should(ExitCleanly()) + // Check oomkilled and exit code values + Expect(inspect.OutputToString()).Should(ContainSubstring("true")) + Expect(inspect.OutputToString()).Should(ContainSubstring("137")) + }) + + It("podman run memory test on successfully exited container", func() { + ctrName := "success-ctr" + session := podmanTest.Podman([]string{"run", "--name", ctrName, "--memory=40m", ALPINE, "echo", "hello"}) + session.WaitWithDefaultTimeout() + Expect(session).Should(ExitCleanly()) + + inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName})) + inspect.WaitWithDefaultTimeout() + Expect(inspect).Should(ExitCleanly()) + // Check oomkilled and exit code values + Expect(inspect.OutputToString()).Should(ContainSubstring("false")) + Expect(inspect.OutputToString()).Should(ContainSubstring("0")) + }) })