forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC
The new MFD_NOEXEC_SEAL and MFD_EXEC flags allows application to set executable bit at creation time (memfd_create). When MFD_NOEXEC_SEAL is set, memfd is created without executable bit (mode:0666), and sealed with F_SEAL_EXEC, so it can't be chmod to be executable (mode: 0777) after creation. when MFD_EXEC flag is set, memfd is created with executable bit (mode:0777), this is the same as the old behavior of memfd_create. The new pid namespaced sysctl vm.memfd_noexec has 3 values: 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_EXEC was set. 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_NOEXEC_SEAL was set. 2: memfd_create() without MFD_NOEXEC_SEAL will be rejected. The sysctl allows finer control of memfd_create for old-software that doesn't set the executable bit, for example, a container with vm.memfd_noexec=1 means the old-software will create non-executable memfd by default. Also, the value of memfd_noexec is passed to child namespace at creation time. For example, if the init namespace has vm.memfd_noexec=2, all its children namespaces will be created with 2. [akpm@linux-foundation.org: add stub functions to fix build] [akpm@linux-foundation.org: remove unneeded register_pid_ns_ctl_table_vm() stub, per Jeff] [akpm@linux-foundation.org: s/pr_warn_ratelimited/pr_warn_once/, per review] [akpm@linux-foundation.org: fix CONFIG_SYSCTL=n warning] Link: https://lkml.kernel.org/r/20221215001205.51969-4-jeffxu@google.com Signed-off-by: Jeff Xu <jeffxu@google.com> Co-developed-by: Daniel Verkamp <dverkamp@chromium.org> Signed-off-by: Daniel Verkamp <dverkamp@chromium.org> Reported-by: kernel test robot <lkp@intel.com> Reviewed-by: Kees Cook <keescook@chromium.org> Cc: David Herrmann <dh.herrmann@gmail.com> Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jann Horn <jannh@google.com> Cc: Jorge Lucangeli Obes <jorgelo@chromium.org> Cc: Shuah Khan <skhan@linuxfoundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
- Loading branch information
Showing
5 changed files
with
134 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 */ | ||
#ifndef LINUX_PID_SYSCTL_H | ||
#define LINUX_PID_SYSCTL_H | ||
|
||
#include <linux/pid_namespace.h> | ||
|
||
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) | ||
static inline void initialize_memfd_noexec_scope(struct pid_namespace *ns) | ||
{ | ||
ns->memfd_noexec_scope = | ||
task_active_pid_ns(current)->memfd_noexec_scope; | ||
} | ||
|
||
static int pid_mfd_noexec_dointvec_minmax(struct ctl_table *table, | ||
int write, void *buf, size_t *lenp, loff_t *ppos) | ||
{ | ||
struct pid_namespace *ns = task_active_pid_ns(current); | ||
struct ctl_table table_copy; | ||
|
||
if (write && !ns_capable(ns->user_ns, CAP_SYS_ADMIN)) | ||
return -EPERM; | ||
|
||
table_copy = *table; | ||
if (ns != &init_pid_ns) | ||
table_copy.data = &ns->memfd_noexec_scope; | ||
|
||
/* | ||
* set minimum to current value, the effect is only bigger | ||
* value is accepted. | ||
*/ | ||
if (*(int *)table_copy.data > *(int *)table_copy.extra1) | ||
table_copy.extra1 = table_copy.data; | ||
|
||
return proc_dointvec_minmax(&table_copy, write, buf, lenp, ppos); | ||
} | ||
|
||
static struct ctl_table pid_ns_ctl_table_vm[] = { | ||
{ | ||
.procname = "memfd_noexec", | ||
.data = &init_pid_ns.memfd_noexec_scope, | ||
.maxlen = sizeof(init_pid_ns.memfd_noexec_scope), | ||
.mode = 0644, | ||
.proc_handler = pid_mfd_noexec_dointvec_minmax, | ||
.extra1 = SYSCTL_ZERO, | ||
.extra2 = SYSCTL_TWO, | ||
}, | ||
{ } | ||
}; | ||
static struct ctl_path vm_path[] = { { .procname = "vm", }, { } }; | ||
static inline void register_pid_ns_sysctl_table_vm(void) | ||
{ | ||
register_sysctl_paths(vm_path, pid_ns_ctl_table_vm); | ||
} | ||
#else | ||
static inline void initialize_memfd_noexec_scope(struct pid_namespace *ns) {} | ||
static inline void set_memfd_noexec_scope(struct pid_namespace *ns) {} | ||
static inline void register_pid_ns_sysctl_table_vm(void) {} | ||
#endif | ||
|
||
#endif /* LINUX_PID_SYSCTL_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters