Skip to content

Commit

Permalink
executor: detect containers killed by OOMKiller
Browse files Browse the repository at this point in the history
If container exits with error and has invoked OOMKiller
mark the origin error as ENOMEM so that it can be detected
on the client side.

gRPC will set ENOMEM as codes.ResouceExhausted based on #5182

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
  • Loading branch information
tonistiigi committed Aug 16, 2024
1 parent b04830b commit bc9e857
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
9 changes: 6 additions & 3 deletions executor/runcexecutor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
}
doReleaseNetwork = false

err = exitError(ctx, err)
err = exitError(ctx, cgroupPath, err)
if err != nil {
if rec != nil {
rec.Close()
Expand All @@ -351,7 +351,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
return rec, rec.CloseAsync(releaseContainer)
}

func exitError(ctx context.Context, err error) error {
func exitError(ctx context.Context, cgroupPath string, err error) error {
if err != nil {
exitErr := &gatewayapi.ExitError{
ExitCode: gatewayapi.UnknownExitStatus,
Expand All @@ -363,6 +363,9 @@ func exitError(ctx context.Context, err error) error {
ExitCode: uint32(runcExitError.Status),
}
}

detectOOM(ctx, cgroupPath, exitErr)

trace.SpanFromContext(ctx).AddEvent(
"Container exited",
trace.WithAttributes(
Expand Down Expand Up @@ -453,7 +456,7 @@ func (w *runcExecutor) Exec(ctx context.Context, id string, process executor.Pro
}

err = w.exec(ctx, id, spec.Process, process, nil)
return exitError(ctx, err)
return exitError(ctx, "", err)
}

type forwardIO struct {
Expand Down
46 changes: 46 additions & 0 deletions executor/runcexecutor/executor_linux.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
package runcexecutor

import (
"bufio"
"context"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"

"github.com/containerd/console"
runc "github.com/containerd/go-runc"
"github.com/moby/buildkit/executor"
gatewayapi "github.com/moby/buildkit/frontend/gateway/pb"
"github.com/moby/buildkit/util/bklog"
"github.com/moby/sys/signal"
"github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -172,3 +177,44 @@ func (w *runcExecutor) callWithIO(ctx context.Context, process executor.ProcessI

return call(ctx, startedCh, runcIO, killer.pidfile)
}

func detectOOM(ctx context.Context, ns string, gwErr *gatewayapi.ExitError) {
const defaultCgroupMountpoint = "/sys/fs/cgroup"

if ns == "" {
return
}

count, err := readMemoryEvent(filepath.Join(defaultCgroupMountpoint, ns), "oom_kill")
if err != nil {
bklog.G(ctx).WithError(err).Warn("failed to read oom_kill event")
return
}
if count > 0 {
gwErr.Err = syscall.ENOMEM
}
}

func readMemoryEvent(fp string, event string) (uint64, error) {
f, err := os.Open(filepath.Join(fp, "memory.events"))
if err != nil {
return 0, err
}
defer f.Close()

s := bufio.NewScanner(f)
for s.Scan() {
parts := strings.Fields(s.Text())
if len(parts) != 2 {
continue
}
if parts[0] != event {
continue
}
v, err := strconv.ParseUint(parts[1], 10, 64)
if err == nil {
return v, nil
}
}
return 0, s.Err()
}

0 comments on commit bc9e857

Please sign in to comment.