From 1e8e65487be63b16fb6b176379e131cabc5d5901 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 7 Jan 2025 19:15:33 -0800 Subject: [PATCH 1/6] libct: document initConfig and friends This is one of the dark corners of runc / libcontainer, so let's shed some light on it. initConfig is a structure which is filled in [mostly] by newInitConfig, and one of its hidden aspects is it contains a process config which is the result of merge between the container and the process configs. Let's document how all this happens, where the fields are coming from, which one has a preference, and how it all works. Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 9 +++++ libcontainer/init_linux.go | 72 +++++++++++++++++++++++---------- libcontainer/process.go | 43 +++++++++++++------- 3 files changed, 87 insertions(+), 37 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 2de3382f6a0..3c419986e9e 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -682,6 +682,9 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm } func (c *Container) newInitConfig(process *Process) *initConfig { + // Set initial properties. For those properties that exist + // both in the container config and the process, use the ones + // from the container config first, and override them later. cfg := &initConfig{ Config: c.config, Args: process.Args, @@ -702,6 +705,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { ConsoleWidth: process.ConsoleWidth, ConsoleHeight: process.ConsoleHeight, } + + // Overwrite config properties with ones from process. + if process.NoNewPrivileges != nil { cfg.NoNewPrivileges = *process.NoNewPrivileges } @@ -714,6 +720,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } + + // Set misc properties. + if cgroups.IsCgroup2UnifiedMode() { cfg.Cgroup2Path = c.cgroupManager.Path("") } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index af62c54e5df..b9b8bb9c1c0 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -48,29 +48,57 @@ type network struct { TempVethPeerName string `json:"temp_veth_peer_name"` } -// initConfig is used for transferring parameters from Exec() to Init() +// initConfig is used for transferring parameters from Exec() to Init(). +// It contains: +// - original container config; +// - some [Process] properties; +// - set of properties merged from the container config ([configs.Config]) +// and the process ([Process]); +// - some properties that come from the container. +// +// When adding new fields, please make sure they go into the relevant section. type initConfig struct { - Args []string `json:"args"` - Env []string `json:"env"` - Cwd string `json:"cwd"` - Capabilities *configs.Capabilities `json:"capabilities"` - ProcessLabel string `json:"process_label"` - AppArmorProfile string `json:"apparmor_profile"` - NoNewPrivileges bool `json:"no_new_privileges"` - User string `json:"user"` - AdditionalGroups []string `json:"additional_groups"` - Config *configs.Config `json:"config"` - Networks []*network `json:"network"` - PassedFilesCount int `json:"passed_files_count"` - ContainerID string `json:"containerid"` - Rlimits []configs.Rlimit `json:"rlimits"` - CreateConsole bool `json:"create_console"` - ConsoleWidth uint16 `json:"console_width"` - ConsoleHeight uint16 `json:"console_height"` - RootlessEUID bool `json:"rootless_euid,omitempty"` - RootlessCgroups bool `json:"rootless_cgroups,omitempty"` - SpecState *specs.State `json:"spec_state,omitempty"` - Cgroup2Path string `json:"cgroup2_path,omitempty"` + // Config is the original container config. + Config *configs.Config `json:"config"` + + // Properties that are unique to and come from [Process]. + + Args []string `json:"args"` + Env []string `json:"env"` + User string `json:"user"` + AdditionalGroups []string `json:"additional_groups"` + Cwd string `json:"cwd"` + CreateConsole bool `json:"create_console"` + ConsoleWidth uint16 `json:"console_width"` + ConsoleHeight uint16 `json:"console_height"` + PassedFilesCount int `json:"passed_files_count"` + + // Properties that exists both in the container config and the process, + // as merged by [Container.newInitConfig] (process properties has preference). + + AppArmorProfile string `json:"apparmor_profile"` + Capabilities *configs.Capabilities `json:"capabilities"` + NoNewPrivileges bool `json:"no_new_privileges"` + ProcessLabel string `json:"process_label"` + Rlimits []configs.Rlimit `json:"rlimits"` + + // Properties that only exist in container config. + // FIXME: they are also passed in Config above. + + RootlessEUID bool `json:"rootless_euid,omitempty"` + RootlessCgroups bool `json:"rootless_cgroups,omitempty"` + + // Miscellaneous properties, filled in by [Container.newInitConfig] + // unless documented otherwise. + + ContainerID string `json:"containerid"` + Cgroup2Path string `json:"cgroup2_path,omitempty"` + + // Networks is filled in from container config by [initProcess.createNetworkInterfaces]. + Networks []*network `json:"network"` + + // SpecState is filled in by [initProcess.Start]. + SpecState *specs.State `json:"spec_state,omitempty"` } // Init is part of "runc init" implementation. diff --git a/libcontainer/process.go b/libcontainer/process.go index 114b3f2b6cb..7353b376f82 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -17,8 +17,11 @@ type processOperations interface { pid() int } -// Process specifies the configuration and IO for a process inside -// a container. +// Process defines the configuration and IO for a process inside a container. +// +// Note that some Process properties are also present in container configuration +// ([configs.Config]). In all such cases, Process properties take precedence +// over container configuration ones. type Process struct { // The command to be run followed by any arguments. Args []string @@ -34,44 +37,54 @@ type Process struct { // in addition to those that the user belongs to. AdditionalGroups []string - // Cwd will change the processes current working directory inside the container's rootfs. + // Cwd will change the process's current working directory inside the container's rootfs. Cwd string - // Stdin is a pointer to a reader which provides the standard input stream. + // Stdin is a reader which provides the standard input stream. Stdin io.Reader - // Stdout is a pointer to a writer which receives the standard output stream. + // Stdout is a writer which receives the standard output stream. Stdout io.Writer - // Stderr is a pointer to a writer which receives the standard error stream. + // Stderr is a writer which receives the standard error stream. Stderr io.Writer - // ExtraFiles specifies additional open files to be inherited by the container + // ExtraFiles specifies additional open files to be inherited by the process. ExtraFiles []*os.File - // open handles to cloned binaries -- see dmz.CloneSelfExe for more details + // Open handles to cloned binaries -- see dmz.CloneSelfExe for more details. clonedExes []*os.File - // Initial sizings for the console + // Initial size for the console. ConsoleWidth uint16 ConsoleHeight uint16 - // Capabilities specify the capabilities to keep when executing the process inside the container - // All capabilities not specified will be dropped from the processes capability mask + // Capabilities specify the capabilities to keep when executing the process. + // All capabilities not specified will be dropped from the processes capability mask. + // + // If not nil, takes precedence over container's [configs.Config.Capabilities]. Capabilities *configs.Capabilities // AppArmorProfile specifies the profile to apply to the process and is - // changed at the time the process is execed + // changed at the time the process is executed. + // + // If not empty, takes precedence over container's [configs.Config.AppArmorProfile]. AppArmorProfile string - // Label specifies the label to apply to the process. It is commonly used by selinux + // Label specifies the label to apply to the process. It is commonly used by selinux. + // + // If not empty, takes precedence over container's [configs.Config.ProcessLabel]. Label string // NoNewPrivileges controls whether processes can gain additional privileges. + // + // If not nil, takes precedence over container's [configs.Config.NoNewPrivileges]. NoNewPrivileges *bool - // Rlimits specifies the resource limits, such as max open files, to set in the container - // If Rlimits are not set, the container will inherit rlimits from the parent process + // Rlimits specifies the resource limits, such as max open files, to set for the process. + // If unset, the process will inherit rlimits from the parent process. + // + // If not empty, takes precedence over container's [configs.Config.Rlimit]. Rlimits []configs.Rlimit // ConsoleSocket provides the masterfd console. From 6b55f22418240790009767eaf67aac186faa2025 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 15 Jan 2025 23:28:08 -0800 Subject: [PATCH 2/6] libct: rm Rootless* properties from initConfig They are passed in initConfig twice, so it does not make sense. NB: the alternative to that would be to remove Config field from initConfig, but it results in a much bigger patch and more maintenance down the road. Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 2 -- libcontainer/init_linux.go | 10 ++-------- libcontainer/rootfs_linux.go | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 3c419986e9e..763c6076632 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -696,8 +696,6 @@ func (c *Container) newInitConfig(process *Process) *initConfig { PassedFilesCount: len(process.ExtraFiles), ContainerID: c.ID(), NoNewPrivileges: c.config.NoNewPrivileges, - RootlessEUID: c.config.RootlessEUID, - RootlessCgroups: c.config.RootlessCgroups, AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index b9b8bb9c1c0..c848139f6b3 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -82,12 +82,6 @@ type initConfig struct { ProcessLabel string `json:"process_label"` Rlimits []configs.Rlimit `json:"rlimits"` - // Properties that only exist in container config. - // FIXME: they are also passed in Config above. - - RootlessEUID bool `json:"rootless_euid,omitempty"` - RootlessCgroups bool `json:"rootless_cgroups,omitempty"` - // Miscellaneous properties, filled in by [Container.newInitConfig] // unless documented otherwise. @@ -499,7 +493,7 @@ func setupUser(config *initConfig, addHome bool) error { } } - if config.RootlessEUID { + if config.Config.RootlessEUID { // We cannot set any additional groups in a rootless container and thus // we bail if the user asked us to do so. TODO: We currently can't do // this check earlier, but if libcontainer.Process.User was typesafe @@ -527,7 +521,7 @@ func setupUser(config *initConfig, addHome bool) error { // There's nothing we can do about /etc/group entries, so we silently // ignore setting groups here (since the user didn't explicitly ask us to // set the group). - allowSupGroups := !config.RootlessEUID && string(bytes.TrimSpace(setgroups)) != "deny" + allowSupGroups := !config.Config.RootlessEUID && string(bytes.TrimSpace(setgroups)) != "deny" if allowSupGroups { suppGroups := append(execUser.Sgids, addGroups...) diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 158e03c56d9..9824511ce99 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -106,7 +106,7 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig) (err error) { root: config.Rootfs, label: config.MountLabel, cgroup2Path: iConfig.Cgroup2Path, - rootlessCgroups: iConfig.RootlessCgroups, + rootlessCgroups: config.RootlessCgroups, cgroupns: config.Namespaces.Contains(configs.NEWCGROUP), } for _, m := range config.Mounts { From 31f3dec47934a47e0ad7bb06a235cd7752f8f475 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 8 Jan 2025 12:25:42 -0800 Subject: [PATCH 3/6] libct/cap: allow New(nil) In runtime-spec, capabilities property is optional, but libcontainer/capabilities panics when New(nil) is called. Because of this, there's a kludge in finalizeNamespace to ensure capabilities.New is not called with nil argument, and there's a TestProcessEmptyCaps to ensure runc won't panic. Let's fix this at the source, allowing libct/cap to work with nil capabilities. (The caller is fixed by the next commit.) Signed-off-by: Kir Kolyshkin --- libcontainer/capabilities/capabilities.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libcontainer/capabilities/capabilities.go b/libcontainer/capabilities/capabilities.go index 4e63d97a201..8c9453435db 100644 --- a/libcontainer/capabilities/capabilities.go +++ b/libcontainer/capabilities/capabilities.go @@ -45,6 +45,9 @@ func KnownCapabilities() []string { // printing a warning instead. func New(capConfig *configs.Capabilities) (*Caps, error) { var c Caps + if capConfig == nil { + return &c, nil + } _, err := capMap() if err != nil { @@ -101,6 +104,9 @@ type Caps struct { // ApplyBoundingSet sets the capability bounding set to those specified in the whitelist. func (c *Caps) ApplyBoundingSet() error { + if c.pid == nil { + return nil + } c.pid.Clear(capability.BOUNDING) c.pid.Set(capability.BOUNDING, c.caps[capability.BOUNDING]...) return c.pid.Apply(capability.BOUNDING) @@ -108,6 +114,9 @@ func (c *Caps) ApplyBoundingSet() error { // Apply sets all the capabilities for the current process in the config. func (c *Caps) ApplyCaps() error { + if c.pid == nil { + return nil + } c.pid.Clear(capability.CAPS | capability.BOUNDS) for _, g := range []capability.CapType{ capability.EFFECTIVE, From 275032399904cbf2381043cedcd06ae232b202e8 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 7 Jan 2025 19:32:27 -0800 Subject: [PATCH 4/6] libct: simplify Caps inheritance For all other properties that are available in both Config and Process, the merging is performed by newInitConfig. Let's do the same for Capabilities for the sake of code uniformity. Also, thanks to the previous commit, we no longer have to make sure we do not call capabilities.New(nil). Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 5 ++++- libcontainer/init_linux.go | 8 +------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 763c6076632..1ab26cc2ccf 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -692,7 +692,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig { User: process.User, AdditionalGroups: process.AdditionalGroups, Cwd: process.Cwd, - Capabilities: process.Capabilities, + Capabilities: c.config.Capabilities, PassedFilesCount: len(process.ExtraFiles), ContainerID: c.ID(), NoNewPrivileges: c.config.NoNewPrivileges, @@ -706,6 +706,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { // Overwrite config properties with ones from process. + if process.Capabilities != nil { + cfg.Capabilities = process.Capabilities + } if process.NoNewPrivileges != nil { cfg.NoNewPrivileges = *process.NoNewPrivileges } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index c848139f6b3..042f7a28f8c 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -324,13 +324,7 @@ func finalizeNamespace(config *initConfig, addHome bool) error { } } - caps := &configs.Capabilities{} - if config.Capabilities != nil { - caps = config.Capabilities - } else if config.Config.Capabilities != nil { - caps = config.Config.Capabilities - } - w, err := capabilities.New(caps) + w, err := capabilities.New(config.Capabilities) if err != nil { return err } From a26f6f324648648f634303a06e16b8ccb198ae80 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 7 Jan 2025 19:48:58 -0800 Subject: [PATCH 5/6] runc exec: fix setting process.ioPriority Commit bfbd0305b added IOPriority field into both Config and Process, but forgot to add a mechanism to actually use Process.IOPriority. As a result, runc exec does not set Process.IOPriority ever. Fix it, and a test case (which fails before the fix). Signed-off-by: Kir Kolyshkin --- CHANGELOG.md | 5 +++++ libcontainer/container_linux.go | 4 ++++ libcontainer/init_linux.go | 3 ++- libcontainer/process.go | 3 +++ libcontainer/setns_init_linux.go | 2 +- libcontainer/standard_init_linux.go | 2 +- tests/integration/ioprio.bats | 22 ++++++++++++++++++---- 7 files changed, 34 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92e1b23cfde..1ae2b5c9843 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * `configs.CommandHook` struct has changed, Command is now a pointer. Also, `configs.NewCommandHook` now accepts a `*Command`. (#4325) +### Fixed + * `runc exec -p` no longer ignores specified `ioPriority` setting. + Similarly, libcontainer's `Container.Start` and `Container.Run` + methods no longer ignore `Process.IOPriority` setting. (#4585) + ## [1.2.0] - 2024-10-22 > できるときにできることをやるんだ。それが今だ。 diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 1ab26cc2ccf..a53e7dab0fc 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -699,6 +699,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig { AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, + IOPriority: c.config.IOPriority, CreateConsole: process.ConsoleSocket != nil, ConsoleWidth: process.ConsoleWidth, ConsoleHeight: process.ConsoleHeight, @@ -721,6 +722,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { if len(process.Rlimits) > 0 { cfg.Rlimits = process.Rlimits } + if process.IOPriority != nil { + cfg.IOPriority = process.IOPriority + } // Set misc properties. diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 042f7a28f8c..8d83a63f799 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -81,6 +81,7 @@ type initConfig struct { NoNewPrivileges bool `json:"no_new_privileges"` ProcessLabel string `json:"process_label"` Rlimits []configs.Rlimit `json:"rlimits"` + IOPriority *configs.IOPriority `json:"io_priority,omitempty"` // Miscellaneous properties, filled in by [Container.newInitConfig] // unless documented otherwise. @@ -672,7 +673,7 @@ func setupScheduler(config *configs.Config) error { return nil } -func setupIOPriority(config *configs.Config) error { +func setupIOPriority(config *initConfig) error { const ioprioWhoPgrp = 1 ioprio := config.IOPriority diff --git a/libcontainer/process.go b/libcontainer/process.go index 7353b376f82..29ad4e80a85 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -114,6 +114,9 @@ type Process struct { Scheduler *configs.Scheduler + // IOPriority is a process I/O priority. + // + // If not empty, takes precedence over container's [configs.Config.IOPriority]. IOPriority *configs.IOPriority } diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index b42b3be1a89..708ffda8b94 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -76,7 +76,7 @@ func (l *linuxSetnsInit) Init() error { return err } - if err := setupIOPriority(l.config.Config); err != nil { + if err := setupIOPriority(l.config); err != nil { return err } // Tell our parent that we're ready to exec. This must be done before the diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 510f9483baa..88ce7c0dd02 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -160,7 +160,7 @@ func (l *linuxStandardInit) Init() error { return err } - if err := setupIOPriority(l.config.Config); err != nil { + if err := setupIOPriority(l.config); err != nil { return err } diff --git a/tests/integration/ioprio.bats b/tests/integration/ioprio.bats index a907d782f01..9faa72d61ab 100644 --- a/tests/integration/ioprio.bats +++ b/tests/integration/ioprio.bats @@ -20,11 +20,25 @@ function teardown() { # Check the init process. runc exec test_ioprio ionice -p 1 [ "$status" -eq 0 ] - [[ "$output" = *'best-effort: prio 4'* ]] + [ "${lines[0]}" = 'best-effort: prio 4' ] - # Check the process made from the exec command. + # Check an exec process, which should derive ioprio from config.json. runc exec test_ioprio ionice [ "$status" -eq 0 ] - - [[ "$output" = *'best-effort: prio 4'* ]] + [ "${lines[0]}" = 'best-effort: prio 4' ] + + # Check an exec with a priority taken from process.json, + # which should override the ioprio in config.json. + proc=' +{ + "terminal": false, + "ioPriority": { + "class": "IOPRIO_CLASS_IDLE" + }, + "args": [ "/usr/bin/ionice" ], + "cwd": "/" +}' + runc exec --process <(echo "$proc") test_ioprio + [ "$status" -eq 0 ] + [ "${lines[0]}" = 'idle' ] } From bad011edfbf9d12ebda122b322419c1ff8f8d059 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 8 Jan 2025 14:11:02 -0800 Subject: [PATCH 6/6] runc exec: fix setting process.Scheduler Commit 770728e1 added Scheduler field into both Config and Process, but forgot to add a mechanism to actually use Process.Scheduler. As a result, runc exec does not set Process.Scheduler ever. Fix it, and a test case (which fails before the fix). Signed-off-by: Kir Kolyshkin --- CHANGELOG.md | 7 +++--- libcontainer/container_linux.go | 4 +++ libcontainer/init_linux.go | 5 ++-- libcontainer/process.go | 3 +++ libcontainer/setns_init_linux.go | 2 +- libcontainer/standard_init_linux.go | 2 +- tests/integration/scheduler.bats | 38 ++++++++++++++++++++++++++--- 7 files changed, 51 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ae2b5c9843..3a621b6f9c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Also, `configs.NewCommandHook` now accepts a `*Command`. (#4325) ### Fixed - * `runc exec -p` no longer ignores specified `ioPriority` setting. - Similarly, libcontainer's `Container.Start` and `Container.Run` - methods no longer ignore `Process.IOPriority` setting. (#4585) + * `runc exec -p` no longer ignores specified `ioPriority` and `scheduler` + settings. Similarly, libcontainer's `Container.Start` and `Container.Run` + methods no longer ignore `Process.IOPriority` and `Process.Scheduler` + settings. (#4585) ## [1.2.0] - 2024-10-22 diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index a53e7dab0fc..bb7e5ccdcce 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -700,6 +700,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig { ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, IOPriority: c.config.IOPriority, + Scheduler: c.config.Scheduler, CreateConsole: process.ConsoleSocket != nil, ConsoleWidth: process.ConsoleWidth, ConsoleHeight: process.ConsoleHeight, @@ -725,6 +726,9 @@ func (c *Container) newInitConfig(process *Process) *initConfig { if process.IOPriority != nil { cfg.IOPriority = process.IOPriority } + if process.Scheduler != nil { + cfg.Scheduler = process.Scheduler + } // Set misc properties. diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 8d83a63f799..a82fc6c8499 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -82,6 +82,7 @@ type initConfig struct { ProcessLabel string `json:"process_label"` Rlimits []configs.Rlimit `json:"rlimits"` IOPriority *configs.IOPriority `json:"io_priority,omitempty"` + Scheduler *configs.Scheduler `json:"scheduler,omitempty"` // Miscellaneous properties, filled in by [Container.newInitConfig] // unless documented otherwise. @@ -656,7 +657,7 @@ func setupRlimits(limits []configs.Rlimit, pid int) error { return nil } -func setupScheduler(config *configs.Config) error { +func setupScheduler(config *initConfig) error { if config.Scheduler == nil { return nil } @@ -665,7 +666,7 @@ func setupScheduler(config *configs.Config) error { return err } if err := unix.SchedSetAttr(0, attr, 0); err != nil { - if errors.Is(err, unix.EPERM) && config.Cgroups.CpusetCpus != "" { + if errors.Is(err, unix.EPERM) && config.Config.Cgroups.CpusetCpus != "" { return errors.New("process scheduler can't be used together with AllowedCPUs") } return fmt.Errorf("error setting scheduler: %w", err) diff --git a/libcontainer/process.go b/libcontainer/process.go index 29ad4e80a85..36a8f2859e7 100644 --- a/libcontainer/process.go +++ b/libcontainer/process.go @@ -112,6 +112,9 @@ type Process struct { // For cgroup v2, the only key allowed is "". SubCgroupPaths map[string]string + // Scheduler represents the scheduling attributes for a process. + // + // If not empty, takes precedence over container's [configs.Config.Scheduler]. Scheduler *configs.Scheduler // IOPriority is a process I/O priority. diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 708ffda8b94..151210cefa8 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -72,7 +72,7 @@ func (l *linuxSetnsInit) Init() error { unix.Umask(int(*l.config.Config.Umask)) } - if err := setupScheduler(l.config.Config); err != nil { + if err := setupScheduler(l.config); err != nil { return err } diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index 88ce7c0dd02..d0f3249c5ed 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -156,7 +156,7 @@ func (l *linuxStandardInit) Init() error { } } - if err := setupScheduler(l.config.Config); err != nil { + if err := setupScheduler(l.config); err != nil { return err } diff --git a/tests/integration/scheduler.bats b/tests/integration/scheduler.bats index b7cd96f8890..6c80d86426b 100644 --- a/tests/integration/scheduler.bats +++ b/tests/integration/scheduler.bats @@ -12,17 +12,49 @@ function teardown() { } @test "scheduler is applied" { - update_config ' .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "priority": 0, "runtime": 42000, "deadline": 1000000, "period": 1000000, }' + update_config ' .process.scheduler = { + "policy": "SCHED_BATCH", + "priority": 0, + "nice": 19 + }' runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler [ "$status" -eq 0 ] + # Check init settings. runc exec test_scheduler chrt -p 1 [ "$status" -eq 0 ] + [[ "${lines[0]}" == *"scheduling policy: SCHED_BATCH" ]] + [[ "${lines[1]}" == *"priority: 0" ]] + + # Check exec settings derived from config.json. + runc exec test_scheduler sh -c 'chrt -p $$' + [ "$status" -eq 0 ] + [[ "${lines[0]}" == *"scheduling policy: SCHED_BATCH" ]] + [[ "${lines[1]}" == *"priority: 0" ]] + + # Another exec, with different scheduler settings. + proc=' +{ + "terminal": false, + "args": [ "/bin/sleep", "600" ], + "cwd": "/", + "scheduler": { + "policy": "SCHED_DEADLINE", + "flags": [ "SCHED_FLAG_RESET_ON_FORK" ], + "nice": 19, + "priority": 0, + "runtime": 42000, + "deadline": 100000, + "period": 1000000 + } +}' + __runc exec -d --pid-file pid.txt --process <(echo "$proc") test_scheduler - [[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE" ]] + run chrt -p "$(cat pid.txt)" + [[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE|SCHED_RESET_ON_FORK" ]] [[ "${lines[1]}" == *"priority: 0" ]] - [[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/1000000/1000000" ]] + [[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/100000/1000000" ]] } # Checks that runc emits a specific error when scheduling policy is used