Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

libct/cg/sd: fix dbus connection leak #2936

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions libcontainer/cgroups/systemd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,15 +384,23 @@ func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDb
})
}

func getManagerProperty(cm *dbusConnManager, name string) (string, error) {
str := ""
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
var err error
str, err = c.GetManagerProperty(name)
return err
})
if err != nil {
return "", err
}
return strconv.Unquote(str)
}

func systemdVersion(cm *dbusConnManager) int {
versionOnce.Do(func() {
version = -1
var verStr string
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
var err error
verStr, err = c.GetManagerProperty("Version")
return err
})
verStr, err := getManagerProperty(cm, "Version")
if err == nil {
version, err = systemdVersionAtoi(verStr)
}
Expand All @@ -407,11 +415,11 @@ func systemdVersion(cm *dbusConnManager) int {

func systemdVersionAtoi(verStr string) (int, error) {
// verStr should be of the form:
// "v245.4-1.fc32", "245", "v245-1.fc32", "245-1.fc32"
// all the input strings include quotes, and the output int should be 245
// thus, we unconditionally remove the `"v`
// and then match on the first integer we can grab
re := regexp.MustCompile(`"?v?([0-9]+)`)
// "v245.4-1.fc32", "245", "v245-1.fc32", "245-1.fc32" (without quotes).
// The result for all of the above should be 245.
// Thus, we unconditionally remove the "v" prefix
// and then match on the first integer we can grab.
re := regexp.MustCompile(`v?([0-9]+)`)
matches := re.FindStringSubmatch(verStr)
if len(matches) < 2 {
return 0, errors.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
Expand Down
18 changes: 17 additions & 1 deletion libcontainer/cgroups/systemd/dbus.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package systemd

import (
"context"
"runtime"
"sync"

systemdDbus "github.com/coreos/go-systemd/v22/dbus"
Expand All @@ -18,9 +19,15 @@ type dbusConnManager struct {

// newDbusConnManager initializes systemd dbus connection manager.
func newDbusConnManager(rootless bool) *dbusConnManager {
return &dbusConnManager{
m := &dbusConnManager{
rootless: rootless,
}

runtime.SetFinalizer(m, func(d *dbusConnManager) {
d.closeConnection()
})
Comment on lines +26 to +28
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a better solution than switching back to a single-connection setup.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only issue with this PR I can think of is, if a runtime creates a lot of containers and keeps references to all of them (and thus to cgroup manager for every container), there will be N*2 opened sockets rather than just 2.

The issue with the other PR is it mixes root and rootless connection into the same variable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kubelet could be operating on up to ~200 pods (typical per node limit) on a single node so there is the overhead of having to create/destroy connections to dbus frequently. We'll have to measure the impact of that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Takes about 1 ms to connect on my laptop.

Here is a a benchmark:

package systemd

import (
	"os"
	"testing"

	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
)

func benchmarkDbusConn(b *testing.B, closeConn bool) {
	cm := newDbusConnManager(os.Getuid() != 0)

	var c *systemdDbus.Conn
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		var err error
		if closeConn {
			cm.closeConnection()
		}
		c, err = cm.getConnection()
		if err != nil {
			b.Fatal(err)
		}
	}
	b.Log(c.GetManagerProperty("Version"))
	cm.closeConnection()
}

func BenchmarkDbusConnReuse(b *testing.B) {
	benchmarkDbusConn(b, false)
}

func BenchmarkDbusConnReopen(b *testing.B) {
	benchmarkDbusConn(b, true)
}

and the results:

kir@kir-rhat systemd]$ go test -v -run 345 -bench .
goos: linux
goarch: amd64
pkg: github.com/opencontainers/runc/libcontainer/cgroups/systemd
cpu: Intel(R) Core(TM) i7-8565U CPU @ 1.80GHz
BenchmarkDbusConnReuse
    dbus_test.go:25: "v246.10-1.fc33" <nil>
    dbus_test.go:25: "v246.10-1.fc33" <nil>
    dbus_test.go:25: "v246.10-1.fc33" <nil>
    dbus_test.go:25: "v246.10-1.fc33" <nil>
    dbus_test.go:25: "v246.10-1.fc33" <nil>
BenchmarkDbusConnReuse-4    	78536199	        13.88 ns/op
BenchmarkDbusConnReopen
    dbus_test.go:25: "v246.10-1.fc33" <nil>
    dbus_test.go:25: "v246.10-1.fc33" <nil>
    dbus_test.go:25: "v246.10-1.fc33" <nil>
    dbus_test.go:25: "v246.10-1.fc33" <nil>
BenchmarkDbusConnReopen-4   	    1520	    804216 ns/op
PASS
ok  	github.com/opencontainers/runc/libcontainer/cgroups/systemd	3.421s

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Surely, there's also some other overhead, most notably opened fds.


return m
}

// getConnection lazily initializes and returns systemd dbus connection.
Expand Down Expand Up @@ -59,6 +66,15 @@ func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) {
return systemdDbus.NewWithContext(context.TODO())
}

func (d *dbusConnManager) closeConnection() {
d.Lock()
defer d.Unlock()
if d.conn != nil {
d.conn.Close()
d.conn = nil
}
}

// resetConnection resets the connection to its initial state
// (so it can be reconnected if necessary).
func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) {
Expand Down
1 change: 1 addition & 0 deletions libcontainer/cgroups/systemd/v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ func (m *legacyManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()

defer m.dbus.closeConnection()
stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))

// Both on success and on error, cleanup all the cgroups we are aware of.
Expand Down
13 changes: 3 additions & 10 deletions libcontainer/cgroups/systemd/v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ func (m *unifiedManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()

defer m.dbus.closeConnection()
unitName := getUnitName(m.cgroups)
if err := stopUnit(m.dbus, unitName); err != nil {
return err
Expand Down Expand Up @@ -337,16 +338,8 @@ func (m *unifiedManager) getSliceFull() (string, error) {
}

if m.rootless {
dbusConnection, err := m.dbus.getConnection()
if err != nil {
return "", err
}
// managerCGQuoted is typically "/user.slice/user-${uid}.slice/user@${uid}.service" including the quote symbols
kolyshkin marked this conversation as resolved.
Show resolved Hide resolved
managerCGQuoted, err := dbusConnection.GetManagerProperty("ControlGroup")
if err != nil {
return "", err
}
managerCG, err := strconv.Unquote(managerCGQuoted)
// managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service".
managerCG, err := getManagerProperty(m.dbus, "ControlGroup")
if err != nil {
return "", err
}
Expand Down
13 changes: 12 additions & 1 deletion libcontainer/integration/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1917,6 +1917,14 @@ func TestCGROUPHost(t *testing.T) {
}

func TestFdLeaks(t *testing.T) {
testFdLeaks(t, false)
}

func TestFdLeaksSystemd(t *testing.T) {
testFdLeaks(t, true)
}

func testFdLeaks(t *testing.T, systemd bool) {
if testing.Short() {
return
}
Expand All @@ -1933,7 +1941,10 @@ func TestFdLeaks(t *testing.T) {
_, err = pfd.Seek(0, 0)
ok(t, err)

config := newTemplateConfig(t, &tParam{rootfs: rootfs})
config := newTemplateConfig(t, &tParam{
rootfs: rootfs,
systemd: systemd,
})
buffers, exitCode, err := runContainer(t, config, "", "true")
ok(t, err)

Expand Down