Skip to content

Commit

Permalink
plugincontainer: Support plugins in rootless runtime with nonroot user (
Browse files Browse the repository at this point in the history
#107)

* Support rootless plugin containers with nonroot users
* Remove rootless podman from CI
* Stop testing podman
* gVisor now only needs --host-uds=create instead of all
* Upgrade go-plugin to v1.6.0
  • Loading branch information
tomhjp authored Nov 20, 2023
1 parent d7b6db5 commit 0268c17
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 114 deletions.
19 changes: 4 additions & 15 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
"runsc": {
"path": "/usr/local/bin/runsc",
"runtimeArgs": [
"--host-uds=all"
"--host-uds=create"
]
}
}
Expand All @@ -82,28 +82,17 @@ jobs:
"runsc": {
"path": "/usr/local/bin/runsc",
"runtimeArgs": [
"--host-uds=all",
"--host-uds=create",
"--ignore-cgroups"
]
}
}
}
EOF
export PATH="$HOME/bin:$PATH"
systemctl --user restart docker
- name: Install rootless podman
if: ${{ matrix.module == 'plugincontainer' }}
run: |
sudo apt-get install -y podman slirp4netns fuse-overlayfs
mkdir -p ~/local/bin
RUNSC_SCRIPT=~/local/bin/runsc.podman
tee "${RUNSC_SCRIPT}" <<EOF
#!/bin/bash
/usr/local/bin/runsc --host-uds=all --ignore-cgroups "\$@"
EOF
chmod u+x "${RUNSC_SCRIPT}"
podman --runtime "${RUNSC_SCRIPT}" system service -t 0 &
- name: Test
run: cd ${{ matrix.module }} && go test ./...

Expand Down
97 changes: 34 additions & 63 deletions plugincontainer/compatibility_matrix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,11 @@ import (
)

const (
engineDocker = "docker"
enginePodman = "podman"
runtimeRunc = "runc"
runtimeRunsc = "runsc"
)

type matrixInput struct {
containerEngine string
containerRuntime string
rootlessEngine bool
rootlessUser bool
Expand All @@ -30,13 +27,10 @@ type matrixInput struct {
func (m matrixInput) String() string {
var s string
if m.rootlessEngine {
s = "rootless "
}
s += m.containerEngine
// Podman does not support configuring the runtime from the SDK.
if m.containerEngine != enginePodman {
s += ":" + m.containerRuntime
s = "rootless_"
}
s += "docker"
s += ":" + m.containerRuntime
if m.rootlessUser {
s += ":" + "nonroot"
}
Expand All @@ -53,63 +47,44 @@ func TestCompatibilityMatrix(t *testing.T) {

runCmd(t, "go", "build", "-o=examples/container/go-plugin-counter", "./examples/container/plugin-counter")

for _, engine := range []string{engineDocker, enginePodman} {
for _, runtime := range []string{runtimeRunc, runtimeRunsc} {
for _, rootlessEngine := range []bool{true, false} {
for _, rootlessUser := range []bool{true, false} {
for _, mlock := range []bool{true, false} {
if engine == enginePodman && runtime == runtimeRunsc {
// Podman does not support configuring the runtime from the SDK,
// so only run 1 of the set of runtime test cases against it.
// TODO: See if we can run two instances of podman to support one
// runtime each.
continue
}
i := matrixInput{
containerEngine: engine,
containerRuntime: runtime,
rootlessEngine: rootlessEngine,
rootlessUser: rootlessUser,
mlock: mlock,
}
t.Run(i.String(), func(t *testing.T) {
runExamplePlugin(t, i)
})
}
}
}
var input matrixInput
testCases := [][2]func(){
{func() { input.rootlessEngine = true }, func() { input.rootlessEngine = false }},
{func() { input.containerRuntime = runtimeRunc }, func() { input.containerRuntime = runtimeRunsc }},
{func() { input.rootlessUser = true }, func() { input.rootlessUser = false }},
{func() { input.mlock = true }, func() { input.mlock = false }},
}
// Run a test for all combinations of 4 binary choices.
// Use 4 bit numbers to represent all possible choices, e.g.
// e.g. 0100 runs rootless_docker:runsc:nonroot:mlock
for i := 0; i < 1<<len(testCases); i++ {
for j := 0; j < len(testCases); j++ {
testCases[j][(i>>j)&1]()
}
t.Run(input.String(), func(t *testing.T) {
runExamplePlugin(t, input)
})
}
}

func skipIfUnsupported(t *testing.T, i matrixInput) {
switch {
case i.rootlessEngine && i.rootlessUser:
t.Skip("Unix socket permissions not yet working for rootless engine + nonroot container user")
case i.containerEngine == enginePodman && !i.rootlessEngine:
t.Skip("TODO: These tests would pass but CI doesn't have the environment set up yet")
case i.mlock && i.rootlessEngine:
if i.containerEngine == engineDocker && i.containerRuntime == runtimeRunsc {
// runsc works in rootless because it has its own implementation of mlockall(2)
} else {
t.Skip("TODO: These tests should work if the rootless engine is given the IPC_LOCK capability")
case i.rootlessEngine && i.containerRuntime == runtimeRunc:
if i.rootlessUser {
t.Skip("runc requires rootlesskit to have DAC_OVERRIDE capability itself, and that undermines being a rootless runtime")
} else if i.mlock {
t.Skip("TODO: Partially working, but tests not yet reliably and repeatably passing")
}
}
}

func setDockerHost(t *testing.T, containerEngine string, rootlessEngine bool) {
func setDockerHost(t *testing.T, rootlessEngine bool) {
var socketFile string
switch {
case containerEngine == engineDocker && !rootlessEngine:
case !rootlessEngine:
socketFile = "/var/run/docker.sock"
case containerEngine == engineDocker && rootlessEngine:
case rootlessEngine:
socketFile = fmt.Sprintf("/run/user/%d/docker.sock", os.Getuid())
case containerEngine == enginePodman && !rootlessEngine:
socketFile = "/var/run/podman/podman.sock"
case containerEngine == enginePodman && rootlessEngine:
socketFile = fmt.Sprintf("/run/user/%d/podman/podman.sock", os.Getuid())
default:
t.Fatalf("Unsupported combination: %s, %v", containerEngine, rootlessEngine)
}
if _, err := os.Stat(socketFile); err != nil {
t.Fatal("Did not find expected socket file:", err)
Expand All @@ -119,35 +94,31 @@ func setDockerHost(t *testing.T, containerEngine string, rootlessEngine bool) {

func runExamplePlugin(t *testing.T, i matrixInput) {
skipIfUnsupported(t, i)
setDockerHost(t, i.containerEngine, i.rootlessEngine)
setDockerHost(t, i.rootlessEngine)

imageRef := goPluginCounterImage
target := "root"
if i.rootlessUser {
imageRef += ":nonroot"
if i.mlock {
target = "nonroot-mlock"
} else {
target = "nonroot"
}
}
runCmd(t, i.containerEngine, "build", "--tag="+imageRef, "--target="+target, "--file=examples/container/Dockerfile", "examples/container")
runCmd(t, "docker", "build", fmt.Sprintf("--tag=%s:%s", goPluginCounterImage, target), "--target="+target, "--file=examples/container/Dockerfile", "examples/container")

cfg := &plugincontainer.Config{
Image: goPluginCounterImage,
GroupAdd: os.Getgid(),
Tag: target,
Runtime: i.containerRuntime,
GroupAdd: os.Getegid(),
Rootless: i.rootlessEngine && i.rootlessUser,
Debug: true,

CapIPCLock: i.mlock,
}
if i.mlock {
cfg.Env = append(cfg.Env, "MLOCK=true")
}
if i.rootlessUser {
cfg.Tag = "nonroot"
}
if i.containerEngine != enginePodman {
cfg.Runtime = i.containerRuntime
}

exerciseExamplePlugin(t, cfg)
}
52 changes: 40 additions & 12 deletions plugincontainer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,52 @@ import (
// Currently only compatible with Linux due to the requirements we have for
// establishing communication over a unix socket.
//
// A temporary directory will be mounted into the container and both the host
// and the plugin will create unix sockets that need to be writable from the
// other side. To achieve this, there are broadly 2 runtime options (i.e. not
// including build-time options):
// A temporary directory will be mounted into the container, which needs to be
// writable by the plugin so it can create a unix socket, which in turn needs
// to be writable from the host. To achieve these 2-way write perimissions,
// this library implements two different strategies:
//
// 1. Set up a uid or gid common to both the host and container processes, and
// ensure the unix socket is writable by that shared id. Set GroupAdd in this
// config and go-plugin ClientConfig's UnixSocketConfig Group with the same
// numeric gid to set up a common group. go-plugin will handle making all
// sockets writable by the gid.
// 2. Use a rootless container runtime, in which case the container process will
// be run as the same unpriveleged user as the client.
// ensure the unix socket is writable by that shared id.
//
// a) For a shared uid, run as root inside the container to avoid being mapped
// to a different uid within the user namespace. No need to set GroupAdd or
// Rootless options, but note this is highly inadvisable unless your container
// runtime is unprivileged/rootless.
//
// b) For a shared gid, use the same numeric gid for GroupAdd in this config
// and go-plugin's ClientConfig.UnixSocketConfig.Group. go-plugin will handle
// making all sockets writable by the gid. Not sufficient on its own for
// rootless runtimes, as the gid will be mapped to a different actual group
// inside the container.
//
// 2. If the container runtime and the container itself are both configured to
// run as non-root users, it's not possible to set up a shared uid or gid.
// In this case, set the Rootless option to enable two changes:
//
// a) Enable the DAC_OVERRIDE capability for the container to allow the
// plugin to create a file in the shared directory. Note it is recommended
// to limit usage of this functionality to gVisor containers, because other
// runtimes will need to be given DAC_OVERRIDE themselves, which undermines
// some of the benefit of using a rootless container runtime.
//
// b) Apply a default ACL to the shared directory, allowing the host to
// write to any socket files created in it. The socket must be group-
// writable for the default ACL to take effect, so GroupAdd must also be
// set.
type Config struct {
// GroupAdd sets an additional group that the container should run as. Should
// match the UnixSocketConfig Group passed to go-plugin. Needs to be set if
// the container runtime is not rootless.
// match the UnixSocketConfig Group passed to go-plugin.
GroupAdd int

// Rootless enables extra steps necessary to make the plugin's Unix socket
// writable by both sides when using a rootless container runtime. It
// should be set if both the host's container runtime and the container
// itself are configured to run as non-privileged users. It requires a file
// system that supports POSIX 1e ACLs, which should be available by default
// on most modern Linux distributions.
Rootless bool

// Container command/env
Entrypoint []string // If specified, replaces the container entrypoint.
Args []string // If specified, replaces the container args.
Expand Down
67 changes: 67 additions & 0 deletions plugincontainer/container_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"io"
"os"
"os/exec"
"path"
"runtime"
Expand All @@ -25,6 +26,7 @@ import (
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-plugin"
"github.com/hashicorp/go-plugin/runner"
"github.com/joshlf/go-acl"
)

var (
Expand Down Expand Up @@ -151,6 +153,12 @@ func (cfg *Config) NewContainerRunner(logger hclog.Logger, cmd *exec.Cmd, hostSo
if cfg.CapIPCLock {
hostConfig.CapAdd = append(hostConfig.CapAdd, "IPC_LOCK")
}
if cfg.Rootless {
hostConfig.CapAdd = append(hostConfig.CapAdd, "DAC_OVERRIDE")
if err := configureDefaultACLsForRootless(hostSocketDir); err != nil {
return nil, err
}
}

// Network config.
networkConfig := &network.NetworkingConfig{
Expand Down Expand Up @@ -430,3 +438,62 @@ Stderr:
%s
--- End Logs ---`, stdout.String(), stderr.String())
}

// If the container runtime is rootless, our GroupAdd trick to make the Unix
// socket and folder writable from both sides stops working. Instead we have:
//
// 1. Run as root within the container still works. The container's root
// user is not mapped to a different host user, so we get:
//
// Host view: Running as 1000, container running as 1000, folder and socket owned by 1000.
// Container view: Running as 0, folder and socket owned by 0.
//
// 2. Run as non-root within the container fails. The container runs as a
// subordinate uid, with the mapping defined by /etc/subuid. e.g. if the host
// unprivileged user is 1001(ubuntu), and /etc/subuid has the following entry:
// ubuntu:100000:65536
//
// Then running as user 1 inside the container will map to user 100000
// on the host, and user 1001 will map to 101000.
//
// Host view: Running as 1000, container running as 101000,
// folder owned by 1000, socket owned by 101000.
// => We need to make the socket writable for the host.
//
// Container view: Running as 1001, folder owned by 0, socket owned by 1001.
// => We need to make the folder writable for the container.
//
// To fix the host permissions, we set default permissions on the folder
// so any Unix sockets created in it are automatically writable.
//
// To fix the container permissions, we give it the DAC_OVERRIDE capability,
// which is normally on by default, and allows the container process to
// ignore file system permission restrictions. The only bit of the host file
// system it has access to though is the empty shared folder.
//
// Similar to mlock and the IPC_LOCK capability, runc requires rootlesskit
// (the container's parent process) to have the DAC_OVERRIDE capability
// itself in order to delegate it to the container. However, runsc has no
// such requirement because it reimplements the syscall in userspace.
//
// Note that the gVisor picture looks a little more complex in terms of how the
// process looks on the host as gVisor adds an extra layer between the container
// and the host, but the same file permission principles apply.
func configureDefaultACLsForRootless(hostSocketDir string) error {
// Setting default ACLs for the socket folder using unix xattr.
a := acl.FromUnix(0o660)
a = append(a, acl.Entry{
Tag: acl.TagUser,
Qualifier: strconv.Itoa(os.Geteuid()),
Perms: 0o006,
})
a = append(a, acl.Entry{
Tag: acl.TagMask,
Perms: 0o006,
})
if err := acl.SetDefault(hostSocketDir, a); err != nil {
return fmt.Errorf("failed to set default ACLs on rootless socket directory: %w", err)
}

return nil
}
9 changes: 5 additions & 4 deletions plugincontainer/container_runner_external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,11 @@ func testExamplePlugin_WithRuntime(t *testing.T, ociRuntime, id, sha256 string)

func exerciseExamplePlugin(t *testing.T, cfg *plugincontainer.Config) {
client := plugin.NewClient(&plugin.ClientConfig{
HandshakeConfig: shared.Handshake,
Plugins: shared.PluginMap,
SkipHostEnv: true,
AutoMTLS: true,
HandshakeConfig: shared.Handshake,
Plugins: shared.PluginMap,
SkipHostEnv: true,
AutoMTLS: true,
GRPCBrokerMultiplex: true,
AllowedProtocols: []plugin.Protocol{
plugin.ProtocolGRPC,
},
Expand Down
Loading

0 comments on commit 0268c17

Please sign in to comment.