Skip to content

Commit

Permalink
Merge pull request #5260 from tonistiigi/executor-oomkiller
Browse files Browse the repository at this point in the history
executor: detect containers killed by OOMKiller
Signed-off-by: Billy Owire <billyowire@microsoft.com>
  • Loading branch information
tonistiigi authored and Billy Owire committed Aug 23, 2024
2 parents 60b8bfe + bc9e857 commit dcfe789
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
9 changes: 6 additions & 3 deletions executor/runcexecutor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
}
doReleaseNetwork = false

err = exitError(ctx, err)
err = exitError(ctx, cgroupPath, err)
if err != nil {
if rec != nil {
rec.Close()
Expand All @@ -351,7 +351,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
return rec, rec.CloseAsync(releaseContainer)
}

func exitError(ctx context.Context, err error) error {
func exitError(ctx context.Context, cgroupPath string, err error) error {
if err != nil {
exitErr := &gatewayapi.ExitError{
ExitCode: gatewayapi.UnknownExitStatus,
Expand All @@ -363,6 +363,9 @@ func exitError(ctx context.Context, err error) error {
ExitCode: uint32(runcExitError.Status),
}
}

detectOOM(ctx, cgroupPath, exitErr)

trace.SpanFromContext(ctx).AddEvent(
"Container exited",
trace.WithAttributes(
Expand Down Expand Up @@ -453,7 +456,7 @@ func (w *runcExecutor) Exec(ctx context.Context, id string, process executor.Pro
}

err = w.exec(ctx, id, spec.Process, process, nil)
return exitError(ctx, err)
return exitError(ctx, "", err)
}

type forwardIO struct {
Expand Down
46 changes: 46 additions & 0 deletions executor/runcexecutor/executor_linux.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
package runcexecutor

import (
"bufio"
"context"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"

"github.com/containerd/console"
runc "github.com/containerd/go-runc"
"github.com/moby/buildkit/executor"
gatewayapi "github.com/moby/buildkit/frontend/gateway/pb"
"github.com/moby/buildkit/util/bklog"
"github.com/moby/sys/signal"
"github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -172,3 +177,44 @@ func (w *runcExecutor) callWithIO(ctx context.Context, process executor.ProcessI

return call(ctx, startedCh, runcIO, killer.pidfile)
}

func detectOOM(ctx context.Context, ns string, gwErr *gatewayapi.ExitError) {
const defaultCgroupMountpoint = "/sys/fs/cgroup"

if ns == "" {
return
}

count, err := readMemoryEvent(filepath.Join(defaultCgroupMountpoint, ns), "oom_kill")
if err != nil {
bklog.G(ctx).WithError(err).Warn("failed to read oom_kill event")
return
}
if count > 0 {
gwErr.Err = syscall.ENOMEM
}
}

func readMemoryEvent(fp string, event string) (uint64, error) {
f, err := os.Open(filepath.Join(fp, "memory.events"))
if err != nil {
return 0, err
}
defer f.Close()

s := bufio.NewScanner(f)
for s.Scan() {
parts := strings.Fields(s.Text())
if len(parts) != 2 {
continue
}
if parts[0] != event {
continue
}
v, err := strconv.ParseUint(parts[1], 10, 64)
if err == nil {
return v, nil
}
}
return 0, s.Err()
}

0 comments on commit dcfe789

Please sign in to comment.