Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC] Implement systemd-specific per-cgroup support (+ proof-of-concept "devices" and "memory") #1991

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions libcontainer/cgroups/fs/devices.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@
package fs

import (
"fmt"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"

systemdDbus "github.com/coreos/go-systemd/dbus"
"github.com/godbus/dbus"
)

type DevicesGroup struct {
Expand Down Expand Up @@ -68,9 +73,76 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
}
}

return s.SetCgroupv1(path, cgroup)
}

func (s *DevicesGroup) SetCgroupv1(path string, cgroup *configs.Cgroup) error {
return nil
}

type deviceAllow struct {
Path string
Permissions string
}

func (s *DevicesGroup) ToSystemdProperties(cgroup *configs.Cgroup) ([]systemdDbus.Property, error) {
var devAllows []deviceAllow
devPolicy := "strict"

devices := cgroup.Resources.Devices
if len(devices) > 0 {
blockedAll := false
for _, dev := range devices {
if !blockedAll {
// Expect the first rule to block all, in which
// case we can translate this cgroup config to
// something systemd will understand.
if dev.Type == 'a' && !dev.Allow {
blockedAll = true
} else {
return []systemdDbus.Property{}, fmt.Errorf("systemd only supports a whitelist on device cgroup, please use AllowedDevices instead.")
}
continue
}
// Ok, now we're handling the second+ device rules to
// whitelist the items that matter to us.
if !dev.Allow {
// We already blocked all, so continue...
continue
}
if devPath := dev.SystemdCgroupPath(); devPath != "" {
devAllows = append(devAllows, deviceAllow{
Path: devPath,
Permissions: dev.Permissions,
})
}
}
} else if cgroup.Resources.AllowAllDevices != nil {
if *cgroup.Resources.AllowAllDevices {
devPolicy = "auto"
} else {
for _, dev := range cgroup.Resources.AllowedDevices {
if devPath := dev.SystemdCgroupPath(); devPath != "" {
devAllows = append(devAllows, deviceAllow{
Path: devPath,
Permissions: dev.Permissions,
})
}
}
}
}
return []systemdDbus.Property{
{
Name: "DevicePolicy",
Value: dbus.MakeVariant(devPolicy),
},
{
Name: "DeviceAllow",
Value: dbus.MakeVariant(devAllows),
},
}, nil
}

func (s *DevicesGroup) Remove(d *cgroupData) error {
return removePath(d.path("devices"))
}
Expand Down
39 changes: 34 additions & 5 deletions libcontainer/cgroups/fs/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import (

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"

systemdDbus "github.com/coreos/go-systemd/dbus"
"github.com/godbus/dbus"
)

const (
Expand Down Expand Up @@ -119,18 +122,21 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}

if cgroup.Resources.KernelMemory != 0 {
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
if cgroup.Resources.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
return err
}
}

if cgroup.Resources.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
return s.SetCgroupv1(path, cgroup)
}

func (s *MemoryGroup) SetCgroupv1(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.KernelMemory != 0 {
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
return err
}
}

if cgroup.Resources.KernelMemoryTCP != 0 {
if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
return err
Expand All @@ -154,6 +160,29 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return nil
}

func (s *MemoryGroup) ToSystemdProperties(cgroup *configs.Cgroup) ([]systemdDbus.Property, error) {
properties := []systemdDbus.Property{}
if cgroup.Resources.Memory != 0 {
properties = append(properties, systemdDbus.Property{
Name: "MemoryMax",
Value: dbus.MakeVariant(uint64(cgroup.Resources.Memory)),
})
}
if cgroup.Resources.MemorySwap != 0 {
properties = append(properties, systemdDbus.Property{
Name: "MemorySwapMax",
Value: dbus.MakeVariant(uint64(cgroup.Resources.MemorySwap)),
})
}
if cgroup.Resources.MemoryReservation != 0 {
properties = append(properties, systemdDbus.Property{
Name: "MemoryLow",
Value: dbus.MakeVariant(uint64(cgroup.Resources.MemoryReservation)),
})
}
return properties, nil
}

func (s *MemoryGroup) Remove(d *cgroupData) error {
return removePath(d.path("memory"))
}
Expand Down
32 changes: 32 additions & 0 deletions libcontainer/cgroups/systemd/apply_systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ type subsystem interface {
Set(path string, cgroup *configs.Cgroup) error
}

type systemdSubsystem interface {
// Returns a list of systemd properties to manage the underlying cgroups.
ToSystemdProperties(cgroup *configs.Cgroup) ([]systemdDbus.Property, error)
// Set the cgroupv1 attributes only, which can be used when in hybrid or legacy mode
// to keep setting properties that are not (and will not be) managed by systemd.
SetCgroupv1(path string, cgroup *configs.Cgroup) error
}

var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")

type subsystemSet []subsystem
Expand Down Expand Up @@ -282,6 +290,18 @@ func (m *Manager) Apply(pid int) error {
}
}

// Set the systemd properties coming from subsystems.
for _, sys := range subsystems {
if sdSys, ok := sys.(systemdSubsystem); ok {
sdProp, err := sdSys.ToSystemdProperties(c)
if err != nil {
return err
}
logrus.Debugf("Setting properties on unit %s from subsystem %s: %#v", unitName, sys.Name(), sdProp)
properties = append(properties, sdProp...)
}
}

statusChan := make(chan string, 1)
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
select {
Expand Down Expand Up @@ -351,6 +371,10 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
func joinCgroups(c *configs.Cgroup, pid int) error {
for _, sys := range subsystems {
name := sys.Name()
if _, ok := sys.(systemdSubsystem); ok {
// Skip it if it's a systemd subsystem, it's been done already.
continue
}
switch name {
case "name=systemd":
// let systemd handle this
Expand Down Expand Up @@ -509,6 +533,14 @@ func (m *Manager) Set(container *configs.Config) error {
return err
}

if sdSys, ok := sys.(systemdSubsystem); ok {
// Only set the cgroupv1 properties.
if err := sdSys.SetCgroupv1(path, container.Cgroups); err != nil {
return err
}
continue
}

if err := sys.Set(path, container.Cgroups); err != nil {
return err
}
Expand Down
16 changes: 16 additions & 0 deletions libcontainer/configs/device.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,22 @@ func (d *Device) CgroupString() string {
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
}

func (d *Device) SystemdCgroupPath() string {
sdType := "char"
if d.Type == 'b' {
sdType = "block"
} else if d.Type != 'c' {
// TODO: Invalid d.Type, do something about it.
return ""
}
// Start looking for wildcards, blocking a whole major.
if d.Minor == Wildcard {
return fmt.Sprintf("%s-%s", sdType, deviceNumberString(d.Major))
}
// Systemd uses /dev/char/x:y or /dev/block/x:y for devices by major/minor.
return fmt.Sprintf("/dev/%s/%s:%s", sdType, deviceNumberString(d.Major), deviceNumberString(d.Minor))
}

func (d *Device) Mkdev() int {
return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
}
Expand Down