Skip to content

Commit

Permalink
Fix EXEC permission of the volume mount when calling mmap with PROT_EXEC
Browse files Browse the repository at this point in the history
## Background
This is Xuzhou (Joe) from `Snowflake Inc`. We are currently working on utilizing gVisor internally as our secured sandboxing mechanism. We have met in-person with gVisor team last October sharing our use case and experiences with the gVisor team. As part of the meeting, we (Snowflake team) committed to contribute our internal fixes/improvements back to the upstream.

As part of compatibility testing with our previous mechanism, we found some behavior discrepancies between gVisor-emulated kernel and native linux kernel when making `mmap` system calls. Thus wanted to raise a pull request and see if this makes sense.

## Issue we observed
On native linux kernel, when calling `mmap` syscall with PROT_EXEC on a file, linux kernel checks if the volume/file system backing the file is executable (ie. Has `NOEXEC` flag or not). If the volume has NOEXEC flag set, the syscall will fail with EPERM.

However, this behavior is not observed when running under gVisor. Instead, gvisor allows this system call to be made without any issue.

Thus this pull request aims to bring the same parity to gvisor implemented mmap syscall.

FUTURE_COPYBARA_INTEGRATE_REVIEW=#11358 from Snowflake-Labs:xuzhoyin-mmap-fix 191b53d
PiperOrigin-RevId: 720236462
  • Loading branch information
Yhinner authored and gvisor-bot committed Jan 27, 2025
1 parent ca0ba7c commit c6a669a
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 0 deletions.
9 changes: 9 additions & 0 deletions pkg/sentry/syscalls/linux/sys_mmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ func Mmap(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *
opts.MaxPerms.Write = false
}

// mmap requires volume NO_EXEC be false if request has PROT_EXEC flag.
if file.Mount().MountFlags()&linux.ST_NOEXEC != 0 {
if opts.Perms.Execute {
return 0, nil, linuxerr.EPERM
}

opts.MaxPerms.Execute = false
}

if err := file.ConfigureMMap(t, &opts); err != nil {
return 0, nil, err
}
Expand Down
1 change: 1 addition & 0 deletions test/syscalls/linux/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1432,6 +1432,7 @@ cc_binary(
"//test/util:file_descriptor",
"//test/util:fs_util",
"//test/util:logging",
"//test/util:memory_util",
"//test/util:mount_util",
"//test/util:multiprocess_util",
"//test/util:posix_error",
Expand Down
78 changes: 78 additions & 0 deletions test/syscalls/linux/mount.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
#include "test/util/fs_util.h"
#include "test/util/linux_capability_util.h"
#include "test/util/logging.h"
#include "test/util/memory_util.h"
#include "test/util/mount_util.h"
#include "test/util/multiprocess_util.h"
#include "test/util/posix_error.h"
Expand Down Expand Up @@ -252,6 +253,83 @@ TEST(MountTest, UmountDetach) {
OpenAt(mounted_dir.get(), "..", O_DIRECTORY | O_RDONLY));
}

TEST(MountTest, MMapWithExecProtFailsOnNoExecFile) {
// Skips the test if test does not have needed capability to create the volume
// mount.
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));

auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
auto ret = ASSERT_NO_ERRNO_AND_VALUE(
Mount("", dir.path(), kTmpfs, MS_NOEXEC, "", 0));
auto file = ASSERT_NO_ERRNO_AND_VALUE(
TempPath::CreateFileWith(dir.path(), "random1", 0777));

FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR));
ASSERT_THAT(reinterpret_cast<uintptr_t>(
mmap(0, kPageSize, PROT_EXEC, MAP_PRIVATE, fd.get(), 0)),
SyscallFailsWithErrno(EPERM));
}

TEST(MountTest, MMapWithExecProtSucceedsOnExecutableVolumeFile) {
// Capability is needed to create tmpfs.
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));

auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
auto ret = ASSERT_NO_ERRNO_AND_VALUE(Mount("", dir.path(), kTmpfs, 0, "", 0));
auto file = ASSERT_NO_ERRNO_AND_VALUE(
TempPath::CreateFileWith(dir.path(), "random1", 0777));

FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR));

void* address = mmap(0, kPageSize, PROT_EXEC, MAP_PRIVATE, fd.get(), 0);
EXPECT_NE(address, MAP_FAILED);

MunmapSafe(address, kPageSize);
}

TEST(MountTest, MMapWithoutNoExecProtSucceedsOnNoExecFile) {
// Capability is needed to create tmpfs.
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));

auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
auto ret = ASSERT_NO_ERRNO_AND_VALUE(
Mount("", dir.path(), kTmpfs, MS_NOEXEC, "", 0));
auto file = ASSERT_NO_ERRNO_AND_VALUE(
TempPath::CreateFileWith(dir.path(), "random1", 0777));
FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR));

void* address =
mmap(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0);
EXPECT_NE(address, MAP_FAILED);

MunmapSafe(address, kPageSize);
}

TEST(MountTest, MProtectWithNoExecProtFailsOnNoExecFile) {
// Capability is needed to create tmpfs.
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));

auto const dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
auto ret = ASSERT_NO_ERRNO_AND_VALUE(
Mount("", dir.path(), kTmpfs, MS_NOEXEC, "", 0));
auto file = ASSERT_NO_ERRNO_AND_VALUE(
TempPath::CreateFileWith(dir.path(), "random1", 0777));
FileDescriptor fd =
ASSERT_NO_ERRNO_AND_VALUE(Open(file.path().c_str(), O_RDWR));

void* address =
mmap(0, kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd.get(), 0);
EXPECT_NE(address, MAP_FAILED);

ASSERT_THAT(mprotect(address, kPageSize, PROT_EXEC),
SyscallFailsWithErrno(EACCES));

MunmapSafe(address, kPageSize);
}

TEST(MountTest, UmountMountsStackedOnDot) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_ADMIN)));
// Verify that unmounting at "." properly unmounts the mount at the top of
Expand Down

0 comments on commit c6a669a

Please sign in to comment.