Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plugincontainer: Support plugins in rootless runtime with nonroot user #107

Merged
merged 16 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 4 additions & 15 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
"runsc": {
"path": "/usr/local/bin/runsc",
"runtimeArgs": [
"--host-uds=all"
"--host-uds=create"
tomhjp marked this conversation as resolved.
Show resolved Hide resolved
]
}
}
Expand All @@ -80,28 +80,17 @@ jobs:
"runsc": {
"path": "/usr/local/bin/runsc",
"runtimeArgs": [
"--host-uds=all",
"--host-uds=create",
"--ignore-cgroups"
]
}
}
}
EOF

export PATH="$HOME/bin:$PATH"
systemctl --user restart docker

- name: Install rootless podman
if: ${{ matrix.module == 'plugincontainer' }}
run: |
sudo apt-get install -y podman slirp4netns fuse-overlayfs
mkdir -p ~/local/bin
RUNSC_SCRIPT=~/local/bin/runsc.podman
tee "${RUNSC_SCRIPT}" <<EOF
#!/bin/bash
/usr/local/bin/runsc --host-uds=all --ignore-cgroups "\$@"
EOF
chmod u+x "${RUNSC_SCRIPT}"
podman --runtime "${RUNSC_SCRIPT}" system service -t 0 &

- name: Test
run: cd ${{ matrix.module }} && go test ./...

Expand Down
122 changes: 60 additions & 62 deletions plugincontainer/compatibility_matrix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,11 @@ import (
)

const (
engineDocker = "docker"
enginePodman = "podman"
runtimeRunc = "runc"
runtimeRunsc = "runsc"
)

type matrixInput struct {
containerEngine string
containerRuntime string
rootlessEngine bool
rootlessUser bool
Expand All @@ -30,13 +27,10 @@ type matrixInput struct {
func (m matrixInput) String() string {
var s string
if m.rootlessEngine {
s = "rootless "
}
s += m.containerEngine
// Podman does not support configuring the runtime from the SDK.
if m.containerEngine != enginePodman {
s += ":" + m.containerRuntime
s = "rootless_"
}
s += "docker"
s += ":" + m.containerRuntime
if m.rootlessUser {
s += ":" + "nonroot"
}
Expand All @@ -46,70 +40,79 @@ func (m matrixInput) String() string {
return s
}

func withContainerRuntime(s string) func(matrixInput) matrixInput {
return func(m matrixInput) matrixInput {
m.containerRuntime = s
return m
}
}

func withRootlessEngine(b bool) func(matrixInput) matrixInput {
return func(m matrixInput) matrixInput {
m.rootlessEngine = b
return m
}
}

func withRootlessUser(b bool) func(matrixInput) matrixInput {
return func(m matrixInput) matrixInput {
m.rootlessUser = b
return m
}
}

func withMlock(b bool) func(matrixInput) matrixInput {
return func(m matrixInput) matrixInput {
m.mlock = b
return m
}
}

func TestCompatibilityMatrix(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("Only linux is supported for now")
}

runCmd(t, "go", "build", "-o=examples/container/go-plugin-counter", "./examples/container/plugin-counter")

for _, engine := range []string{engineDocker, enginePodman} {
for _, runtime := range []string{runtimeRunc, runtimeRunsc} {
for _, rootlessEngine := range []bool{true, false} {
for _, rootlessUser := range []bool{true, false} {
for _, mlock := range []bool{true, false} {
if engine == enginePodman && runtime == runtimeRunsc {
// Podman does not support configuring the runtime from the SDK,
// so only run 1 of the set of runtime test cases against it.
// TODO: See if we can run two instances of podman to support one
// runtime each.
continue
}
i := matrixInput{
containerEngine: engine,
containerRuntime: runtime,
rootlessEngine: rootlessEngine,
rootlessUser: rootlessUser,
mlock: mlock,
}
t.Run(i.String(), func(t *testing.T) {
runExamplePlugin(t, i)
})
}
}
}
testCases := [][]func(matrixInput) matrixInput{
{withRootlessEngine(true), withRootlessEngine(false)},
{withContainerRuntime(runtimeRunc), withContainerRuntime(runtimeRunsc)},
{withRootlessUser(true), withRootlessUser(false)},
{withMlock(true), withMlock(false)},
}
// Run a test for all combinations of 4 binary choices.
tomhjp marked this conversation as resolved.
Show resolved Hide resolved
// Use 4 bit numbers to represent all possible choices, e.g.
// e.g. 0100 runs rootless_docker:runsc:nonroot:mlock
for i := 0; i < 1<<len(testCases); i++ {
var input matrixInput
for j := 0; j < len(testCases); j++ {
input = testCases[j][(i>>j)&1](input)
}
t.Run(input.String(), func(t *testing.T) {
runExamplePlugin(t, input)
})
}
}

func skipIfUnsupported(t *testing.T, i matrixInput) {
switch {
case i.rootlessEngine && i.rootlessUser:
t.Skip("Unix socket permissions not yet working for rootless engine + nonroot container user")
case i.containerEngine == enginePodman && !i.rootlessEngine:
t.Skip("TODO: These tests would pass but CI doesn't have the environment set up yet")
case i.mlock && i.rootlessEngine:
if i.containerEngine == engineDocker && i.containerRuntime == runtimeRunsc {
// runsc works in rootless because it has its own implementation of mlockall(2)
} else {
t.Skip("TODO: These tests should work if the rootless engine is given the IPC_LOCK capability")
case i.rootlessEngine && i.containerRuntime == runtimeRunc:
if i.rootlessUser {
t.Skip("runc requires rootlesskit to have DAC_OVERRIDE capability itself, and that's a very powerful capability")
} else if i.mlock {
t.Skip("TODO: Partially working, but tests not yet reliably and repeatably passing")
}
}
}

func setDockerHost(t *testing.T, containerEngine string, rootlessEngine bool) {
func setDockerHost(t *testing.T, rootlessEngine bool) {
var socketFile string
switch {
case containerEngine == engineDocker && !rootlessEngine:
case !rootlessEngine:
socketFile = "/var/run/docker.sock"
case containerEngine == engineDocker && rootlessEngine:
case rootlessEngine:
socketFile = fmt.Sprintf("/run/user/%d/docker.sock", os.Getuid())
case containerEngine == enginePodman && !rootlessEngine:
socketFile = "/var/run/podman/podman.sock"
case containerEngine == enginePodman && rootlessEngine:
socketFile = fmt.Sprintf("/run/user/%d/podman/podman.sock", os.Getuid())
default:
t.Fatalf("Unsupported combination: %s, %v", containerEngine, rootlessEngine)
}
if _, err := os.Stat(socketFile); err != nil {
t.Fatal("Did not find expected socket file:", err)
Expand All @@ -119,35 +122,30 @@ func setDockerHost(t *testing.T, containerEngine string, rootlessEngine bool) {

func runExamplePlugin(t *testing.T, i matrixInput) {
skipIfUnsupported(t, i)
setDockerHost(t, i.containerEngine, i.rootlessEngine)
setDockerHost(t, i.rootlessEngine)

imageRef := goPluginCounterImage
target := "root"
if i.rootlessUser {
imageRef += ":nonroot"
if i.mlock {
target = "nonroot-mlock"
} else {
target = "nonroot"
}
}
runCmd(t, i.containerEngine, "build", "--tag="+imageRef, "--target="+target, "--file=examples/container/Dockerfile", "examples/container")
runCmd(t, "docker", "build", fmt.Sprintf("--tag=%s:%s", goPluginCounterImage, target), "--target="+target, "--file=examples/container/Dockerfile", "examples/container")

cfg := &plugincontainer.Config{
Image: goPluginCounterImage,
Tag: target,
Runtime: i.containerRuntime,
GroupAdd: os.Getgid(),
Debug: true,
Rootless: i.rootlessEngine && i.rootlessUser,

CapIPCLock: i.mlock,
}
if i.mlock {
cfg.Env = append(cfg.Env, "MLOCK=true")
}
if i.rootlessUser {
cfg.Tag = "nonroot"
}
if i.containerEngine != enginePodman {
cfg.Runtime = i.containerRuntime
}
exerciseExamplePlugin(t, cfg)
}
11 changes: 9 additions & 2 deletions plugincontainer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,17 @@ import (
// be run as the same unpriveleged user as the client.
type Config struct {
// GroupAdd sets an additional group that the container should run as. Should
// match the UnixSocketConfig Group passed to go-plugin. Needs to be set if
// the container runtime is not rootless.
// match the UnixSocketConfig Group passed to go-plugin. It should be set if
// the container runtime runs as root.
GroupAdd int

// Rootless is an alternative to GroupAdd, useful for rootless installs. It
// should be set if both the host's container runtime and the container
// itself are configured to run as non-privileged users. It requires a file
// system that supports POSIX 1e ACLs, which should be available by default
// on most modern Linux distributions.
Rootless bool

// Container command/env
Entrypoint []string // If specified, replaces the container entrypoint.
Args []string // If specified, replaces the container args.
Expand Down
66 changes: 66 additions & 0 deletions plugincontainer/container_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"io"
"os"
"os/exec"
"path"
"runtime"
Expand All @@ -25,6 +26,7 @@ import (
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/go-plugin/runner"
"github.com/joshlf/go-acl"
)

var (
Expand Down Expand Up @@ -151,6 +153,12 @@ func (cfg *Config) NewContainerRunner(logger hclog.Logger, cmd *exec.Cmd, hostSo
if cfg.CapIPCLock {
hostConfig.CapAdd = append(hostConfig.CapAdd, "IPC_LOCK")
}
if cfg.Rootless {
hostConfig.CapAdd = append(hostConfig.CapAdd, "DAC_OVERRIDE")
if err := configureDefaultACLsForRootless(hostSocketDir); err != nil {
return nil, err
}
}

// Network config.
networkConfig := &network.NetworkingConfig{
Expand Down Expand Up @@ -430,3 +438,61 @@ Stderr:
%s
--- End Logs ---`, stdout.String(), stderr.String())
}

// If the container runtime is rootless, our GroupAdd trick to make the Unix
// socket and folder writable from both sides stops working. Instead we have:
//
// 1. Run as root within the container still works. The container's root
// user is not mapped to a different host user, so we get:
//
// Host view: Running as 1000, container running as 1000, folder and socket owned by 1000.
// Container view: Running as 0, folder and socket owned by 0.
//
// 2. Run as non-root within the container fails. The container runs as a
// subordinate uid, with the mapping defined by /etc/subuid. e.g. if the host
// unprivileged user is 1000(ubuntu), and /etc/subuid has the following entry:
// ubuntu:100000:65536
//
// Then running as user 1 inside the container will map to user 100000
// on the host, and user 1001 will map to 101000.
//
// Host view: Running as 1000, container running as 101000,
// folder owned by 1000, socket owned by 101000.
// => We need to make the socket writable for the host.
//
// Container view: Running as 1001, folder owned by 0, socket owned by 1001.
// => We need to make the folder writable for the container.
//
// To fix the host permissions, we set default permissions on the folder
// so any Unix sockets created in it are automatically writable.
//
// To fix the container permissions, we give it the DAC_OVERRIDE capability
// which is normally on by default, and allows the container process to
// ignore file system permissions for any files mounted inside the container.
//
// Similar to mlock and the IPC_LOCK capability, runc requires rootlesskit
// (the container's parent process) to have the DAC_OVERRIDE capability
// itself in order to delegate it to the container. However, runsc has no
// such requirement because it reimplements the syscall in userspace.
//
// Note that the gVisor picture looks a little more complex in terms of how the
// process looks on the host as gVisor adds an extra layer between the container
// and the host, but the same file permission principles apply.
func configureDefaultACLsForRootless(hostSocketDir string) error {
// Setting default ACLs for the socket folder using unix xattr.
a := acl.FromUnix(0o600)
a = append(a, acl.Entry{
Tag: acl.TagUser,
Qualifier: strconv.Itoa(os.Geteuid()),
Perms: 0o006,
})
a = append(a, acl.Entry{
Tag: acl.TagMask,
Perms: 0o006,
})
if err := acl.SetDefault(hostSocketDir, a); err != nil {
return fmt.Errorf("failed to set default ACLs on rootless socket directory: %w", err)
}

return nil
}
9 changes: 5 additions & 4 deletions plugincontainer/container_runner_external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,11 @@ func testExamplePlugin_WithRuntime(t *testing.T, ociRuntime, id, sha256 string)

func exerciseExamplePlugin(t *testing.T, cfg *plugincontainer.Config) {
client := plugin.NewClient(&plugin.ClientConfig{
HandshakeConfig: shared.Handshake,
Plugins: shared.PluginMap,
SkipHostEnv: true,
AutoMTLS: true,
HandshakeConfig: shared.Handshake,
Plugins: shared.PluginMap,
SkipHostEnv: true,
AutoMTLS: true,
GRPCBrokerMultiplex: true,
AllowedProtocols: []plugin.Protocol{
plugin.ProtocolGRPC,
},
Expand Down
20 changes: 7 additions & 13 deletions plugincontainer/examples/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,20 @@ FROM docker.mirror.hashicorp.services/alpine as nonroot

COPY go-plugin-counter /bin/go-plugin-counter

RUN addgroup -S nonroot && \
adduser -S -G nonroot nonroot && \
chown -R nonroot:nonroot /bin/go-plugin-counter

USER nonroot

ENTRYPOINT [ "/bin/go-plugin-counter" ]

FROM docker.mirror.hashicorp.services/alpine as nonroot-mlock

COPY go-plugin-counter /bin/go-plugin-counter

RUN apk add libcap && \
addgroup -S nonroot && \
adduser -S -G nonroot nonroot && \
chown -R nonroot:nonroot /bin/go-plugin-counter && \
setcap cap_ipc_lock=+ep /bin/go-plugin-counter
cp /bin/go-plugin-counter /bin/go-plugin-counter-mlock && \
setcap cap_ipc_lock=+ep /bin/go-plugin-counter-mlock

USER nonroot

ENTRYPOINT [ "/bin/go-plugin-counter" ]

FROM nonroot as nonroot-mlock

ENTRYPOINT [ "/bin/go-plugin-counter-mlock" ]

# Set root as the default image.
FROM root
Loading