Skip to content

Commit

Permalink
Open bind mount sources from the host userns
Browse files Browse the repository at this point in the history
The source of the bind mount might not be accessible in a different user
namespace because a component of the source path might not be traversed
under the users and groups mapped inside the user namespace. This caused
errors such as the following:

  # time="2020-06-22T13:48:26Z" level=error msg="container_linux.go:367:
  starting container process caused: process_linux.go:459:
  container init caused: rootfs_linux.go:58:
  mounting \"/tmp/busyboxtest/source-inaccessible/dir\"
  to rootfs at \"/tmp/inaccessible\" caused:
  stat /tmp/busyboxtest/source-inaccessible/dir: permission denied"

To solve this problem, this patch performs the following:

1. in nsexec.c, it opens the source path in the host userns (so we have
   the right permissions to open it) but in the container mntns (so the
   kernel cross mntns mount check let us mount it later:
   https://github.com/torvalds/linux/blob/v5.8/fs/namespace.c#L2312).

2. in nsexec.c, it passes the file descriptors of the source to the
   child process with SCM_RIGHTS.

3. In runc-init in Golang, it finishes the mounts while inside the
   userns even without access to the some components of the source
   paths.

Passing the fds with SCM_RIGHTS is necessary because once the child
process is in the container mntns, it is already in the container userns
so it cannot temporarily join the host mntns.

This patch uses the existing mechanism with _LIBCONTAINER_* environment
variables to pass the file descriptors from runc to runc init.

This patch uses the existing mechanism with the Netlink-style bootstrap
to pass information about the list of source mounts to nsexec.c.

Rootless containers don't use this bind mount sources fdpassing
mechanism because we can't setns() to the target mntns in a rootless
container (we don't have the privileges when we are in the host userns).

This patch takes care of using O_CLOEXEC on mount fds, and close them
early.

Fixes: opencontainers#2484.

Signed-off-by: Alban Crequy <alban@kinvolk.io>
Signed-off-by: Rodrigo Campos <rodrigo@kinvolk.io>
Co-authored-by: Rodrigo Campos <rodrigo@kinvolk.io>
  • Loading branch information
alban and rata committed Sep 28, 2021
1 parent e999e29 commit f8bf41e
Show file tree
Hide file tree
Showing 8 changed files with 439 additions and 25 deletions.
6 changes: 6 additions & 0 deletions libcontainer/configs/mount.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package configs

import "golang.org/x/sys/unix"

const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
Expand Down Expand Up @@ -37,3 +39,7 @@ type Mount struct {
// Optional Command to be run after Source is mounted.
PostmountCmds []Command `json:"postmount_cmds"`
}

func (m *Mount) IsBind() bool {
return m.Flags&unix.MS_BIND != 0
}
80 changes: 76 additions & 4 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,33 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
return cmd
}

// shouldSendMountSources says whether the child process must setup bind mounts with
// the source pre-opened (O_PATH) in the host user namespace.
// See https://github.com/opencontainers/runc/issues/2484
func (c *linuxContainer) shouldSendMountSources() bool {
// Passing the mount sources via SCM_RIGHTS is only necessary when
// both userns and mntns are active.
if !c.config.Namespaces.Contains(configs.NEWUSER) ||
!c.config.Namespaces.Contains(configs.NEWNS) {
return false
}

// nsexec.c send_mountsources() requires setns(mntns) capabilities
// CAP_SYS_CHROOT and CAP_SYS_ADMIN.
if c.config.RootlessEUID {
return false
}

// We need to send sources if there are bind-mounts.
for _, m := range c.config.Mounts {
if m.IsBind() {
return true
}
}

return false
}

func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
Expand All @@ -529,10 +556,37 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPa
}
}
_, sharePidns := nsMaps[configs.NEWPID]
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
if err != nil {
return nil, err
}

if c.shouldSendMountSources() {
mountFds := make([]string, len(c.config.Mounts))
for i, m := range c.config.Mounts {
if !m.IsBind() {
// StartInitialization() pairs elements on this slice with mounts.
// So we insert the empty string so the length of this slice and number of mounts
// match and they can be paired.
mountFds[i] = ""
continue
}

// The fd passed here will not be used: nsexec.c will overwrite it with dup3(). We just need
// to allocate a fd so that we know the number to pass in the environment variable. The fd
// must not be closed before cmd.Start(), so we reuse messageSockPair.child because the
// lifecycle of that fd is already taken care of.
cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
mountFds[i] = strconv.Itoa(stdioFdCount + len(cmd.ExtraFiles) - 1)
}

// By using strings.Join() to create the str, we guarantee that we can parse it with strings.Split()
// in StartInitialization() and get a slice of the same length.
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_MOUNT_FDS="+strings.Join(mountFds, ";"),
)
}

init := &initProcess{
cmd: cmd,
messageSockPair: messageSockPair,
Expand All @@ -557,7 +611,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
}
// for setns process, we don't have to set cloneflags as the process namespaces
// will only be set via setns syscall
data, err := c.bootstrapData(0, state.NamespacePaths)
data, err := c.bootstrapData(0, state.NamespacePaths, initSetns)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -1190,7 +1244,9 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
case "bind":
// The prepareBindMount() function checks if source
// exists. So it cannot be used for other filesystem types.
if err := prepareBindMount(m, c.config.Rootfs); err != nil {
// TODO: pass something else than nil? Not sure if criu is
// impacted by issue #2484
if err := prepareBindMount(m, c.config.Rootfs, nil); err != nil {
return err
}
default:
Expand Down Expand Up @@ -2027,7 +2083,7 @@ func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
// such as one that uses nsenter package to bootstrap the container's
// init process correctly, i.e. with correct namespaces, uid/gid
// mapping etc.
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) {
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (io.Reader, error) {
// create the netlink message
r := nl.NewNetlinkRequest(int(InitMsg), 0)

Expand Down Expand Up @@ -2109,6 +2165,22 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
Value: c.config.RootlessEUID,
})

// Bind mount source to open.
if it == initStandard && c.shouldSendMountSources() {
var mounts []byte
for _, m := range c.config.Mounts {
if m.IsBind() {
mounts = append(mounts, []byte(m.Source)...)
}
mounts = append(mounts, byte(0))
}

r.AddData(&Bytemsg{
Type: MountSourcesAttr,
Value: mounts,
})
}

return bytes.NewReader(r.Serialize()), nil
}

Expand Down
38 changes: 37 additions & 1 deletion libcontainer/factory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"regexp"
"runtime/debug"
"strconv"
"strings"

securejoin "github.com/cyphar/filepath-securejoin"
"github.com/moby/sys/mountinfo"
Expand Down Expand Up @@ -295,6 +296,12 @@ func (l *LinuxFactory) StartInitialization() (err error) {
return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)
}

// Get mount files (O_PATH).
mountFds, err := parseMountFds()
if err != nil {
return err
}

// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
Expand All @@ -305,7 +312,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
}
}()

i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd)
i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds)
if err != nil {
return err
}
Expand Down Expand Up @@ -359,3 +366,32 @@ func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
return nil
}
}

func parseMountFds() ([]*int, error) {
fdsStr := os.Getenv("_LIBCONTAINER_MOUNT_FDS")
if fdsStr == "" {
// Always return the nil slice if no fd is present.
return nil, nil
}

// This string was created so we can parse it just fine with strings.Split() and it has the proper len.
fds := strings.Split(fdsStr, ";")

// Indexes in the mountFds slice are used to assign/pair the fd (if any) to a mount in the container. Therefore,
// the mountFds slice MUST be of size len(fds), that was constructed with the same size as the container mounts.
mountFds := make([]*int, len(fds))
for i, fd := range fds {
if fd == "" {
continue
}

mountFd, err := strconv.Atoi(fd)
if err != nil {
return nil, fmt.Errorf("unable to convert _LIBCONTAINER_MOUNT_FDS(%q): %w", fdsStr, err)
}

mountFds[i] = &mountFd
}

return mountFds, nil
}
8 changes: 7 additions & 1 deletion libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ type initer interface {
Init() error
}

func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int) (initer, error) {
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds []*int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
Expand All @@ -86,6 +86,11 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
}
switch t {
case initSetns:
// mountFds must be nil in this case. We don't mount while doing runc exec.
if mountFds != nil {
return nil, errors.New("mountFds must be nil. Can't mount while doing runc exec.")
}

return &linuxSetnsInit{
pipe: pipe,
consoleSocket: consoleSocket,
Expand All @@ -100,6 +105,7 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
config: config,
fifoFd: fifoFd,
logFd: logFd,
mountFds: mountFds,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
Expand Down
1 change: 1 addition & 0 deletions libcontainer/message_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
RootlessEUIDAttr uint16 = 27287
UidmapPathAttr uint16 = 27288
GidmapPathAttr uint16 = 27289
MountSourcesAttr uint16 = 27290
)

type Int32msg struct {
Expand Down
Loading

0 comments on commit f8bf41e

Please sign in to comment.