Skip to content

Commit

Permalink
merge branch 'pr-3093' into release-1.0
Browse files Browse the repository at this point in the history
Kir Kolyshkin (6):
  libct/cg/sd: add TestPodSkipDevicesUpdate
  libct/cg/sd: TestFreezePodCgroup: rm explicit freeze
  libct/cg/sd/v1: Set: avoid unnecessary freeze/thaw
  libct/int/TestFreeze: test freeze/thaw via Set
  libct/int: allow subtests
  libct/cg/sd/v1: Set: don't overwrite r.Freezer

LGTMs: mrunalp cyphar
Closes #3093
  • Loading branch information
cyphar committed Jul 16, 2021
2 parents e14c134 + 4efb7a6 commit 40dcf1f
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 47 deletions.
9 changes: 9 additions & 0 deletions libcontainer/cgroups/systemd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,15 @@ func resetFailedUnit(cm *dbusConnManager, name string) {
}
}

func getUnitProperty(cm *dbusConnManager, unitName string, propertyName string) (*systemdDbus.Property, error) {
var prop *systemdDbus.Property
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) {
prop, Err = c.GetUnitPropertyContext(context.TODO(), unitName, propertyName)
return Err
})
return prop, err
}

func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
Expand Down
107 changes: 104 additions & 3 deletions libcontainer/cgroups/systemd/systemd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,110 @@ func newManager(config *configs.Cgroup) cgroups.Manager {
return NewLegacyManager(config, nil)
}

// TestPodSkipDevicesUpdate checks that updating a pod having SkipDevices: true
// does not result in spurious "permission denied" errors in a container
// running under the pod. The test is somewhat similar in nature to the
// @test "update devices [minimal transition rules]" in tests/integration,
// but uses a pod.
func TestPodSkipDevicesUpdate(t *testing.T) {
if !IsRunningSystemd() {
t.Skip("Test requires systemd.")
}
if os.Geteuid() != 0 {
t.Skip("Test requires root.")
}

podName := "system-runc_test_pod" + t.Name() + ".slice"
podConfig := &configs.Cgroup{
Parent: "system.slice",
Name: podName,
Resources: &configs.Resources{
PidsLimit: 42,
Memory: 32 * 1024 * 1024,
SkipDevices: true,
},
}
// Create "pod" cgroup (a systemd slice to hold containers).
pm := newManager(podConfig)
defer pm.Destroy() //nolint:errcheck
if err := pm.Apply(-1); err != nil {
t.Fatal(err)
}
if err := pm.Set(podConfig.Resources); err != nil {
t.Fatal(err)
}

containerConfig := &configs.Cgroup{
Parent: podName,
ScopePrefix: "test",
Name: "PodSkipDevicesUpdate",
Resources: &configs.Resources{
Devices: []*devices.Rule{
// Allow access to /dev/null.
{
Type: devices.CharDevice,
Major: 1,
Minor: 3,
Permissions: "rwm",
Allow: true,
},
},
},
}

// Create a "container" within the "pod" cgroup.
// This is not a real container, just a process in the cgroup.
cmd := exec.Command("bash", "-c", "while true; do echo > /dev/null; done")
cmd.Env = append(os.Environ(), "LANG=C")
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Start(); err != nil {
t.Fatal(err)
}
// Make sure to not leave a zombie.
defer func() {
// These may fail, we don't care.
_ = cmd.Process.Kill()
_ = cmd.Wait()
}()

// Put the process into a cgroup.
cm := newManager(containerConfig)
defer cm.Destroy() //nolint:errcheck

if err := cm.Apply(cmd.Process.Pid); err != nil {
t.Fatal(err)
}
// Check that we put the "container" into the "pod" cgroup.
if !strings.HasPrefix(cm.Path("devices"), pm.Path("devices")) {
t.Fatalf("expected container cgroup path %q to be under pod cgroup path %q",
cm.Path("devices"), pm.Path("devices"))
}
if err := cm.Set(containerConfig.Resources); err != nil {
t.Fatal(err)
}

// Now update the pod a few times.
for i := 0; i < 42; i++ {
podConfig.Resources.PidsLimit++
podConfig.Resources.Memory += 1024 * 1024
if err := pm.Set(podConfig.Resources); err != nil {
t.Fatal(err)
}
}
// Kill the "container".
if err := cmd.Process.Kill(); err != nil {
t.Fatal(err)
}

_ = cmd.Wait()

// "Container" stderr should be empty.
if stderr.Len() != 0 {
t.Fatalf("container stderr not empty: %s", stderr.String())
}
}

func testSkipDevices(t *testing.T, skipDevices bool, expected []string) {
if !IsRunningSystemd() {
t.Skip("Test requires systemd.")
Expand Down Expand Up @@ -219,9 +323,6 @@ func TestFreezePodCgroup(t *testing.T) {
t.Fatal(err)
}

if err := pm.Freeze(configs.Frozen); err != nil {
t.Fatal(err)
}
if err := pm.Set(podConfig.Resources); err != nil {
t.Fatal(err)
}
Expand Down
115 changes: 85 additions & 30 deletions libcontainer/cgroups/systemd/v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ import (
"sync"

systemdDbus "github.com/coreos/go-systemd/v22/dbus"
"github.com/godbus/dbus/v5"
"github.com/sirupsen/logrus"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
)

type legacyManager struct {
Expand Down Expand Up @@ -278,18 +280,23 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
}

func (m *legacyManager) Freeze(state configs.FreezerState) error {
err := m.doFreeze(state)
if err == nil {
m.cgroups.Resources.Freezer = state
}
return err
}

// doFreeze is the same as Freeze but without
// changing the m.cgroups.Resources.Frozen field.
func (m *legacyManager) doFreeze(state configs.FreezerState) error {
path, ok := m.paths["freezer"]
if !ok {
return errSubsystemDoesNotExist
}
prevState := m.cgroups.Resources.Freezer
m.cgroups.Resources.Freezer = state
freezer := &fs.FreezerGroup{}
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
m.cgroups.Resources.Freezer = prevState
return err
}
return nil
resources := &configs.Resources{Freezer: state}
return freezer.Set(path, resources)
}

func (m *legacyManager) GetPids() ([]int, error) {
Expand Down Expand Up @@ -325,6 +332,61 @@ func (m *legacyManager) GetStats() (*cgroups.Stats, error) {
return stats, nil
}

// freezeBeforeSet answers whether there is a need to freeze the cgroup before
// applying its systemd unit properties, and thaw after, while avoiding
// unnecessary freezer state changes.
//
// The reason why we have to freeze is that systemd's application of device
// rules is done disruptively, resulting in spurious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we have to freeze the container to avoid the container get
// an occasional "permission denied" error.
func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (needsFreeze, needsThaw bool, err error) {
// Special case for SkipDevices, as used by Kubernetes to create pod
// cgroups with allow-all device policy).
if r.SkipDevices {
// No need to freeze if SkipDevices is set, and either
// (1) systemd unit does not (yet) exist, or
// (2) it has DevicePolicy=auto and empty DeviceAllow list.
//
// Interestingly, (1) and (2) are the same here because
// a non-existent unit returns default properties,
// and settings in (2) are the defaults.
//
// Do not return errors from getUnitProperty, as they alone
// should not prevent Set from working.
devPolicy, e := getUnitProperty(m.dbus, unitName, "DevicePolicy")
if e == nil && devPolicy.Value == dbus.MakeVariant("auto") {
devAllow, e := getUnitProperty(m.dbus, unitName, "DeviceAllow")
if e == nil && devAllow.Value == dbus.MakeVariant([]deviceAllowEntry{}) {
needsFreeze = false
needsThaw = false
return
}
}
}

needsFreeze = true
needsThaw = true

// Check the current freezer state.
freezerState, err := m.GetFreezerState()
if err != nil {
return
}
if freezerState == configs.Frozen {
// Already frozen, and should stay frozen.
needsFreeze = false
needsThaw = false
}

if r.Freezer == configs.Frozen {
// Will be frozen anyway -- no need to thaw.
needsThaw = false
}
return
}

func (m *legacyManager) Set(r *configs.Resources) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
Expand All @@ -339,34 +401,27 @@ func (m *legacyManager) Set(r *configs.Resources) error {
return err
}

// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
unitName := getUnitName(m.cgroups)
needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r)
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}

// We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
if needsFreeze {
if err := m.doFreeze(configs.Frozen); err != nil {
// If freezer cgroup isn't supported, we just warn about it.
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
}

if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
_ = m.Freeze(targetFreezerState)
return err
setErr := setUnitProperties(m.dbus, unitName, properties...)
if needsThaw {
if err := m.doFreeze(configs.Thawed); err != nil {
logrus.Infof("thaw container after SetUnitProperties failed: %v", err)
}
}
if setErr != nil {
return setErr
}

// Reset freezer state before we apply the configuration, to avoid clashing
// with the freezer setting in the configuration.
_ = m.Freeze(targetFreezerState)

for _, sys := range legacySubsystems {
// Get the subsystem path, but don't error out for not found cgroups.
Expand Down
49 changes: 37 additions & 12 deletions libcontainer/integration/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,28 +493,41 @@ func TestAdditionalGroups(t *testing.T) {
}

func TestFreeze(t *testing.T) {
testFreeze(t, false)
}

func TestSystemdFreeze(t *testing.T) {
if !systemd.IsRunningSystemd() {
t.Skip("Test requires systemd.")
for _, systemd := range []bool{true, false} {
for _, set := range []bool{true, false} {
name := ""
if systemd {
name += "Systemd"
} else {
name += "FS"
}
if set {
name += "ViaSet"
} else {
name += "ViaPauseResume"
}
t.Run(name, func(t *testing.T) {
testFreeze(t, systemd, set)
})
}
}
testFreeze(t, true)
}

func testFreeze(t *testing.T, systemd bool) {
func testFreeze(t *testing.T, withSystemd bool, useSet bool) {
if testing.Short() {
return
}
if withSystemd && !systemd.IsRunningSystemd() {
t.Skip("Test requires systemd.")
}

rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)

config := newTemplateConfig(t, &tParam{
rootfs: rootfs,
systemd: systemd,
systemd: withSystemd,
})
container, err := newContainer(t, config)
ok(t, err)
Expand All @@ -535,16 +548,28 @@ func testFreeze(t *testing.T, systemd bool) {
defer stdinW.Close() //nolint: errcheck
ok(t, err)

err = container.Pause()
if !useSet {
err = container.Pause()
} else {
config.Cgroups.Resources.Freezer = configs.Frozen
err = container.Set(*config)
}
ok(t, err)

state, err := container.Status()
ok(t, err)
err = container.Resume()
ok(t, err)
if state != libcontainer.Paused {
t.Fatal("Unexpected state: ", state)
}

if !useSet {
err = container.Resume()
} else {
config.Cgroups.Resources.Freezer = configs.Thawed
err = container.Set(*config)
}
ok(t, err)

_ = stdinW.Close()
waitProcess(pconfig, t)
}
Expand Down
3 changes: 2 additions & 1 deletion libcontainer/integration/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package integration

import (
"strconv"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -215,7 +216,7 @@ func newTemplateConfig(t *testing.T, p *tParam) *configs.Config {

if p.systemd {
id := strconv.FormatInt(-int64(time.Now().Nanosecond()), 36)
config.Cgroups.Name = t.Name() + id
config.Cgroups.Name = strings.ReplaceAll(t.Name(), "/", "_") + id
// do not change Parent (see newContainer)
config.Cgroups.Parent = "system.slice"
config.Cgroups.ScopePrefix = "runc-test"
Expand Down
Loading

0 comments on commit 40dcf1f

Please sign in to comment.