diff --git a/libcontainer/cgroups/systemd/common.go b/libcontainer/cgroups/systemd/common.go index 6edcaebf76f..125f6f5562e 100644 --- a/libcontainer/cgroups/systemd/common.go +++ b/libcontainer/cgroups/systemd/common.go @@ -389,6 +389,15 @@ func resetFailedUnit(cm *dbusConnManager, name string) { } } +func getUnitProperty(cm *dbusConnManager, unitName string, propertyName string) (*systemdDbus.Property, error) { + var prop *systemdDbus.Property + err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) { + prop, Err = c.GetUnitPropertyContext(context.TODO(), unitName, propertyName) + return Err + }) + return prop, err +} + func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error { return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...) diff --git a/libcontainer/cgroups/systemd/v1.go b/libcontainer/cgroups/systemd/v1.go index 936ec4bc57e..1a8e1e3c6c1 100644 --- a/libcontainer/cgroups/systemd/v1.go +++ b/libcontainer/cgroups/systemd/v1.go @@ -10,6 +10,7 @@ import ( "sync" systemdDbus "github.com/coreos/go-systemd/v22/dbus" + "github.com/godbus/dbus/v5" "github.com/sirupsen/logrus" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -331,6 +332,61 @@ func (m *legacyManager) GetStats() (*cgroups.Stats, error) { return stats, nil } +// freezeBeforeSet answers whether there is a need to freeze the cgroup before +// applying its systemd unit properties, and thaw after, while avoiding +// unnecessary freezer state changes. +// +// The reason why we have to freeze is that systemd's application of device +// rules is done disruptively, resulting in spurious errors to common devices +// (unlike our fs driver, they will happily write deny-all rules to running +// containers). So we have to freeze the container to avoid the container get +// an occasional "permission denied" error. +func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (needsFreeze, needsThaw bool, err error) { + // Special case for SkipDevices, as used by Kubernetes to create pod + // cgroups with allow-all device policy). + if r.SkipDevices { + // No need to freeze if SkipDevices is set, and either + // (1) systemd unit does not (yet) exist, or + // (2) it has DevicePolicy=auto and empty DeviceAllow list. + // + // Interestingly, (1) and (2) are the same here because + // a non-existent unit returns default properties, + // and settings in (2) are the defaults. + // + // Do not return errors from getUnitProperty, as they alone + // should not prevent Set from working. + devPolicy, e := getUnitProperty(m.dbus, unitName, "DevicePolicy") + if e == nil && devPolicy.Value == dbus.MakeVariant("auto") { + devAllow, e := getUnitProperty(m.dbus, unitName, "DeviceAllow") + if e == nil && devAllow.Value == dbus.MakeVariant([]deviceAllowEntry{}) { + needsFreeze = false + needsThaw = false + return + } + } + } + + needsFreeze = true + needsThaw = true + + // Check the current freezer state. + freezerState, err := m.GetFreezerState() + if err != nil { + return + } + if freezerState == configs.Frozen { + // Already frozen, and should stay frozen. + needsFreeze = false + needsThaw = false + } + + if r.Freezer == configs.Frozen { + // Will be frozen anyway -- no need to thaw. + needsThaw = false + } + return +} + func (m *legacyManager) Set(r *configs.Resources) error { // If Paths are set, then we are just joining cgroups paths // and there is no need to set any values. @@ -345,29 +401,24 @@ func (m *legacyManager) Set(r *configs.Resources) error { return err } - // Figure out the current freezer state, so we can revert to it after we - // temporarily freeze the container. - targetFreezerState, err := m.GetFreezerState() + unitName := getUnitName(m.cgroups) + needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r) if err != nil { return err } - if targetFreezerState == configs.Undefined { - targetFreezerState = configs.Thawed - } - // We have to freeze the container while systemd sets the cgroup settings. - // The reason for this is that systemd's application of DeviceAllow rules - // is done disruptively, resulting in spurrious errors to common devices - // (unlike our fs driver, they will happily write deny-all rules to running - // containers). So we freeze the container to avoid them hitting the cgroup - // error. But if the freezer cgroup isn't supported, we just warn about it. - if err := m.doFreeze(configs.Frozen); err != nil { - logrus.Infof("freeze container before SetUnitProperties failed: %v", err) + if needsFreeze { + if err := m.doFreeze(configs.Frozen); err != nil { + // If freezer cgroup isn't supported, we just warn about it. + logrus.Infof("freeze container before SetUnitProperties failed: %v", err) + } + } + setErr := setUnitProperties(m.dbus, unitName, properties...) + if needsThaw { + if err := m.doFreeze(configs.Thawed); err != nil { + logrus.Infof("thaw container after SetUnitProperties failed: %v", err) + } } - - setErr := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...) - - _ = m.doFreeze(targetFreezerState) if setErr != nil { return setErr }