Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1.0] cgroups: Set: fix freeze, avoid unnecessary freeze from systemd v1 #3093

Merged
merged 6 commits into from
Jul 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions libcontainer/cgroups/systemd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,15 @@ func resetFailedUnit(cm *dbusConnManager, name string) {
}
}

func getUnitProperty(cm *dbusConnManager, unitName string, propertyName string) (*systemdDbus.Property, error) {
var prop *systemdDbus.Property
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) {
prop, Err = c.GetUnitPropertyContext(context.TODO(), unitName, propertyName)
return Err
})
return prop, err
}

func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
Expand Down
107 changes: 104 additions & 3 deletions libcontainer/cgroups/systemd/systemd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,110 @@ func newManager(config *configs.Cgroup) cgroups.Manager {
return NewLegacyManager(config, nil)
}

// TestPodSkipDevicesUpdate checks that updating a pod having SkipDevices: true
// does not result in spurious "permission denied" errors in a container
// running under the pod. The test is somewhat similar in nature to the
// @test "update devices [minimal transition rules]" in tests/integration,
// but uses a pod.
func TestPodSkipDevicesUpdate(t *testing.T) {
if !IsRunningSystemd() {
t.Skip("Test requires systemd.")
}
if os.Geteuid() != 0 {
t.Skip("Test requires root.")
}

podName := "system-runc_test_pod" + t.Name() + ".slice"
podConfig := &configs.Cgroup{
Parent: "system.slice",
Name: podName,
Resources: &configs.Resources{
PidsLimit: 42,
Memory: 32 * 1024 * 1024,
SkipDevices: true,
},
}
// Create "pod" cgroup (a systemd slice to hold containers).
pm := newManager(podConfig)
defer pm.Destroy() //nolint:errcheck
if err := pm.Apply(-1); err != nil {
t.Fatal(err)
}
if err := pm.Set(podConfig.Resources); err != nil {
t.Fatal(err)
}

containerConfig := &configs.Cgroup{
Parent: podName,
ScopePrefix: "test",
Name: "PodSkipDevicesUpdate",
Resources: &configs.Resources{
Devices: []*devices.Rule{
// Allow access to /dev/null.
{
Type: devices.CharDevice,
Major: 1,
Minor: 3,
Permissions: "rwm",
Allow: true,
},
},
},
}

// Create a "container" within the "pod" cgroup.
// This is not a real container, just a process in the cgroup.
cmd := exec.Command("bash", "-c", "while true; do echo > /dev/null; done")
cmd.Env = append(os.Environ(), "LANG=C")
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Start(); err != nil {
t.Fatal(err)
}
// Make sure to not leave a zombie.
defer func() {
// These may fail, we don't care.
_ = cmd.Process.Kill()
_ = cmd.Wait()
}()

// Put the process into a cgroup.
cm := newManager(containerConfig)
defer cm.Destroy() //nolint:errcheck

if err := cm.Apply(cmd.Process.Pid); err != nil {
t.Fatal(err)
}
// Check that we put the "container" into the "pod" cgroup.
if !strings.HasPrefix(cm.Path("devices"), pm.Path("devices")) {
t.Fatalf("expected container cgroup path %q to be under pod cgroup path %q",
cm.Path("devices"), pm.Path("devices"))
}
if err := cm.Set(containerConfig.Resources); err != nil {
t.Fatal(err)
}

// Now update the pod a few times.
for i := 0; i < 42; i++ {
podConfig.Resources.PidsLimit++
podConfig.Resources.Memory += 1024 * 1024
if err := pm.Set(podConfig.Resources); err != nil {
t.Fatal(err)
}
}
// Kill the "container".
if err := cmd.Process.Kill(); err != nil {
t.Fatal(err)
}

_ = cmd.Wait()

// "Container" stderr should be empty.
if stderr.Len() != 0 {
t.Fatalf("container stderr not empty: %s", stderr.String())
}
}

func testSkipDevices(t *testing.T, skipDevices bool, expected []string) {
if !IsRunningSystemd() {
t.Skip("Test requires systemd.")
Expand Down Expand Up @@ -219,9 +323,6 @@ func TestFreezePodCgroup(t *testing.T) {
t.Fatal(err)
}

if err := pm.Freeze(configs.Frozen); err != nil {
t.Fatal(err)
}
if err := pm.Set(podConfig.Resources); err != nil {
t.Fatal(err)
}
Expand Down
115 changes: 85 additions & 30 deletions libcontainer/cgroups/systemd/v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ import (
"sync"

systemdDbus "github.com/coreos/go-systemd/v22/dbus"
"github.com/godbus/dbus/v5"
"github.com/sirupsen/logrus"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
)

type legacyManager struct {
Expand Down Expand Up @@ -278,18 +280,23 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
}

func (m *legacyManager) Freeze(state configs.FreezerState) error {
err := m.doFreeze(state)
if err == nil {
m.cgroups.Resources.Freezer = state
}
return err
}

// doFreeze is the same as Freeze but without
// changing the m.cgroups.Resources.Frozen field.
func (m *legacyManager) doFreeze(state configs.FreezerState) error {
path, ok := m.paths["freezer"]
if !ok {
return errSubsystemDoesNotExist
}
prevState := m.cgroups.Resources.Freezer
m.cgroups.Resources.Freezer = state
freezer := &fs.FreezerGroup{}
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
m.cgroups.Resources.Freezer = prevState
return err
}
return nil
resources := &configs.Resources{Freezer: state}
return freezer.Set(path, resources)
}

func (m *legacyManager) GetPids() ([]int, error) {
Expand Down Expand Up @@ -325,6 +332,61 @@ func (m *legacyManager) GetStats() (*cgroups.Stats, error) {
return stats, nil
}

// freezeBeforeSet answers whether there is a need to freeze the cgroup before
// applying its systemd unit properties, and thaw after, while avoiding
// unnecessary freezer state changes.
//
// The reason why we have to freeze is that systemd's application of device
// rules is done disruptively, resulting in spurious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we have to freeze the container to avoid the container get
// an occasional "permission denied" error.
func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (needsFreeze, needsThaw bool, err error) {
// Special case for SkipDevices, as used by Kubernetes to create pod
// cgroups with allow-all device policy).
if r.SkipDevices {
// No need to freeze if SkipDevices is set, and either
// (1) systemd unit does not (yet) exist, or
// (2) it has DevicePolicy=auto and empty DeviceAllow list.
//
// Interestingly, (1) and (2) are the same here because
// a non-existent unit returns default properties,
// and settings in (2) are the defaults.
//
// Do not return errors from getUnitProperty, as they alone
// should not prevent Set from working.
devPolicy, e := getUnitProperty(m.dbus, unitName, "DevicePolicy")
if e == nil && devPolicy.Value == dbus.MakeVariant("auto") {
devAllow, e := getUnitProperty(m.dbus, unitName, "DeviceAllow")
if e == nil && devAllow.Value == dbus.MakeVariant([]deviceAllowEntry{}) {
needsFreeze = false
needsThaw = false
return
}
}
}

needsFreeze = true
needsThaw = true

// Check the current freezer state.
freezerState, err := m.GetFreezerState()
if err != nil {
return
}
if freezerState == configs.Frozen {
// Already frozen, and should stay frozen.
needsFreeze = false
needsThaw = false
}

if r.Freezer == configs.Frozen {
// Will be frozen anyway -- no need to thaw.
needsThaw = false
}
return
}

func (m *legacyManager) Set(r *configs.Resources) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
Expand All @@ -339,34 +401,27 @@ func (m *legacyManager) Set(r *configs.Resources) error {
return err
}

// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
unitName := getUnitName(m.cgroups)
needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r)
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}

// We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
if needsFreeze {
if err := m.doFreeze(configs.Frozen); err != nil {
// If freezer cgroup isn't supported, we just warn about it.
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
}

if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
_ = m.Freeze(targetFreezerState)
return err
setErr := setUnitProperties(m.dbus, unitName, properties...)
if needsThaw {
if err := m.doFreeze(configs.Thawed); err != nil {
logrus.Infof("thaw container after SetUnitProperties failed: %v", err)
}
}
if setErr != nil {
return setErr
}

// Reset freezer state before we apply the configuration, to avoid clashing
// with the freezer setting in the configuration.
_ = m.Freeze(targetFreezerState)

for _, sys := range legacySubsystems {
// Get the subsystem path, but don't error out for not found cgroups.
Expand Down
49 changes: 37 additions & 12 deletions libcontainer/integration/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,28 +493,41 @@ func TestAdditionalGroups(t *testing.T) {
}

func TestFreeze(t *testing.T) {
testFreeze(t, false)
}

func TestSystemdFreeze(t *testing.T) {
if !systemd.IsRunningSystemd() {
t.Skip("Test requires systemd.")
for _, systemd := range []bool{true, false} {
for _, set := range []bool{true, false} {
name := ""
if systemd {
name += "Systemd"
} else {
name += "FS"
}
if set {
name += "ViaSet"
} else {
name += "ViaPauseResume"
}
t.Run(name, func(t *testing.T) {
testFreeze(t, systemd, set)
})
}
}
testFreeze(t, true)
}

func testFreeze(t *testing.T, systemd bool) {
func testFreeze(t *testing.T, withSystemd bool, useSet bool) {
if testing.Short() {
return
}
if withSystemd && !systemd.IsRunningSystemd() {
t.Skip("Test requires systemd.")
}

rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)

config := newTemplateConfig(t, &tParam{
rootfs: rootfs,
systemd: systemd,
systemd: withSystemd,
})
container, err := newContainer(t, config)
ok(t, err)
Expand All @@ -535,16 +548,28 @@ func testFreeze(t *testing.T, systemd bool) {
defer stdinW.Close() //nolint: errcheck
ok(t, err)

err = container.Pause()
if !useSet {
err = container.Pause()
} else {
config.Cgroups.Resources.Freezer = configs.Frozen
err = container.Set(*config)
}
ok(t, err)

state, err := container.Status()
ok(t, err)
err = container.Resume()
ok(t, err)
if state != libcontainer.Paused {
t.Fatal("Unexpected state: ", state)
}

if !useSet {
err = container.Resume()
} else {
config.Cgroups.Resources.Freezer = configs.Thawed
err = container.Set(*config)
}
ok(t, err)

_ = stdinW.Close()
waitProcess(pconfig, t)
}
Expand Down
3 changes: 2 additions & 1 deletion libcontainer/integration/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package integration

import (
"strconv"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -215,7 +216,7 @@ func newTemplateConfig(t *testing.T, p *tParam) *configs.Config {

if p.systemd {
id := strconv.FormatInt(-int64(time.Now().Nanosecond()), 36)
config.Cgroups.Name = t.Name() + id
config.Cgroups.Name = strings.ReplaceAll(t.Name(), "/", "_") + id
// do not change Parent (see newContainer)
config.Cgroups.Parent = "system.slice"
config.Cgroups.ScopePrefix = "runc-test"
Expand Down
Loading