From 7867ef36237fb9e9b7f47710199909887e632589 Mon Sep 17 00:00:00 2001 From: Sven Rebhan <36194019+srebhan@users.noreply.github.com> Date: Wed, 5 Jan 2022 16:45:03 +0100 Subject: [PATCH] fix: fix missing storage in container with disk plugin (#10318) --- plugins/inputs/disk/disk.go | 16 +- plugins/inputs/disk/disk_test.go | 170 ++++++++++++++++++ .../disk/testdata/issue_10297/1/mountinfo | 2 + .../inputs/disk/testdata/success/1/mountinfo | 2 + plugins/inputs/system/ps.go | 46 ++++- 5 files changed, 224 insertions(+), 12 deletions(-) create mode 100644 plugins/inputs/disk/testdata/issue_10297/1/mountinfo create mode 100644 plugins/inputs/disk/testdata/success/1/mountinfo diff --git a/plugins/inputs/disk/disk.go b/plugins/inputs/disk/disk.go index fc552a232b799..a995d72ccf22b 100644 --- a/plugins/inputs/disk/disk.go +++ b/plugins/inputs/disk/disk.go @@ -17,6 +17,8 @@ type DiskStats struct { MountPoints []string `toml:"mount_points"` IgnoreFS []string `toml:"ignore_fs"` + + Log telegraf.Logger `toml:"-"` } func (ds *DiskStats) Description() string { @@ -36,17 +38,24 @@ func (ds *DiskStats) SampleConfig() string { return diskSampleConfig } -func (ds *DiskStats) Gather(acc telegraf.Accumulator) error { +func (ds *DiskStats) Init() error { // Legacy support: if len(ds.LegacyMountPoints) != 0 { ds.MountPoints = ds.LegacyMountPoints } + ps := system.NewSystemPS() + ps.Log = ds.Log + ds.ps = ps + + return nil +} + +func (ds *DiskStats) Gather(acc telegraf.Accumulator) error { disks, partitions, err := ds.ps.DiskUsage(ds.MountPoints, ds.IgnoreFS) if err != nil { return fmt.Errorf("error getting disk usage info: %s", err) } - for i, du := range disks { if du.Total == 0 { // Skip dummy filesystem (procfs, cgroupfs, ...) @@ -102,8 +111,7 @@ func (opts MountOptions) exists(opt string) bool { } func init() { - ps := system.NewSystemPS() inputs.Add("disk", func() telegraf.Input { - return &DiskStats{ps: ps} + return &DiskStats{} }) } diff --git a/plugins/inputs/disk/disk_test.go b/plugins/inputs/disk/disk_test.go index 22dd947406ff5..5905293433260 100644 --- a/plugins/inputs/disk/disk_test.go +++ b/plugins/inputs/disk/disk_test.go @@ -3,12 +3,17 @@ package disk import ( "fmt" "os" + "path/filepath" + "runtime" + "strings" "testing" + "time" diskUtil "github.com/shirou/gopsutil/v3/disk" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs/system" "github.com/influxdata/telegraf/testutil" ) @@ -377,3 +382,168 @@ func TestDiskStats(t *testing.T) { require.NoError(t, err) require.Equal(t, 2*expectedAllDiskMetrics+7, acc.NFields()) } + +func TestDiskUsageIssues(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip("Skipping due to Linux-only test-cases...") + } + + tests := []struct { + name string + prefix string + du diskUtil.UsageStat + expected []telegraf.Metric + }{ + { + name: "success", + prefix: "", + du: diskUtil.UsageStat{ + Total: 256, + Free: 46, + Used: 200, + InodesTotal: 2468, + InodesFree: 468, + InodesUsed: 2000, + }, + expected: []telegraf.Metric{ + testutil.MustMetric( + "disk", + map[string]string{ + "device": "tmpfs", + "fstype": "tmpfs", + "mode": "rw", + "path": "/tmp", + }, + map[string]interface{}{ + "total": uint64(256), + "used": uint64(200), + "free": uint64(46), + "inodes_total": uint64(2468), + "inodes_free": uint64(468), + "inodes_used": uint64(2000), + "used_percent": float64(81.30081300813008), + }, + time.Unix(0, 0), + telegraf.Gauge, + ), + testutil.MustMetric( + "disk", + map[string]string{ + "device": "nvme0n1p4", + "fstype": "ext4", + "mode": "rw", + "path": "/", + }, + map[string]interface{}{ + "total": uint64(256), + "used": uint64(200), + "free": uint64(46), + "inodes_total": uint64(2468), + "inodes_free": uint64(468), + "inodes_used": uint64(2000), + "used_percent": float64(81.30081300813008), + }, + time.Unix(0, 0), + telegraf.Gauge, + ), + }, + }, + { + name: "issue 10297", + prefix: "/host", + du: diskUtil.UsageStat{ + Total: 256, + Free: 46, + Used: 200, + InodesTotal: 2468, + InodesFree: 468, + InodesUsed: 2000, + }, + expected: []telegraf.Metric{ + testutil.MustMetric( + "disk", + map[string]string{ + "device": "sda1", + "fstype": "ext4", + "mode": "rw", + "path": "/", + }, + map[string]interface{}{ + "total": uint64(256), + "used": uint64(200), + "free": uint64(46), + "inodes_total": uint64(2468), + "inodes_free": uint64(468), + "inodes_used": uint64(2000), + "used_percent": float64(81.30081300813008), + }, + time.Unix(0, 0), + telegraf.Gauge, + ), + testutil.MustMetric( + "disk", + map[string]string{ + "device": "sdb", + "fstype": "ext4", + "mode": "rw", + "path": "/mnt/storage", + }, + map[string]interface{}{ + "total": uint64(256), + "used": uint64(200), + "free": uint64(46), + "inodes_total": uint64(2468), + "inodes_free": uint64(468), + "inodes_used": uint64(2000), + "used_percent": float64(81.30081300813008), + }, + time.Unix(0, 0), + telegraf.Gauge, + ), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Setup the environment + hostMountPrefix := tt.prefix + hostProcPrefix, err := filepath.Abs(filepath.Join("testdata", strings.ReplaceAll(tt.name, " ", "_"))) + require.NoError(t, err) + + // Get the partitions in the test-case + os.Clearenv() + require.NoError(t, os.Setenv("HOST_PROC", hostProcPrefix)) + partitions, err := diskUtil.Partitions(true) + require.NoError(t, err) + + // Mock the disk usage + mck := &mock.Mock{} + mps := system.MockPSDisk{SystemPS: &system.SystemPS{PSDiskDeps: &system.MockDiskUsage{Mock: mck}}, Mock: mck} + defer mps.AssertExpectations(t) + + mps.On("Partitions", true).Return(partitions, nil) + + for _, partition := range partitions { + mountpoint := partition.Mountpoint + if hostMountPrefix != "" { + mountpoint = filepath.Join(hostMountPrefix, partition.Mountpoint) + } + diskUsage := tt.du + diskUsage.Path = mountpoint + diskUsage.Fstype = partition.Fstype + mps.On("PSDiskUsage", mountpoint).Return(&diskUsage, nil) + } + mps.On("OSGetenv", "HOST_MOUNT_PREFIX").Return(hostMountPrefix) + + // Setup the plugin and run the test + var acc testutil.Accumulator + plugin := &DiskStats{ps: &mps} + require.NoError(t, plugin.Gather(&acc)) + + actual := acc.GetTelegrafMetrics() + testutil.RequireMetricsEqual(t, tt.expected, actual, testutil.IgnoreTime(), testutil.SortMetrics()) + }) + } + os.Clearenv() +} diff --git a/plugins/inputs/disk/testdata/issue_10297/1/mountinfo b/plugins/inputs/disk/testdata/issue_10297/1/mountinfo new file mode 100644 index 0000000000000..012aae7ce2f66 --- /dev/null +++ b/plugins/inputs/disk/testdata/issue_10297/1/mountinfo @@ -0,0 +1,2 @@ +31 1 8:1 / / rw,relatime shared:1 - ext4 /dev/sda1 rw,discard,errors=remount-ro +126 31 8:16 / /mnt/storage rw,relatime shared:67 - ext4 /dev/sdb rw,discard diff --git a/plugins/inputs/disk/testdata/success/1/mountinfo b/plugins/inputs/disk/testdata/success/1/mountinfo new file mode 100644 index 0000000000000..70c532242dcf8 --- /dev/null +++ b/plugins/inputs/disk/testdata/success/1/mountinfo @@ -0,0 +1,2 @@ +26 1 259:4 / / rw,relatime shared:1 - ext4 /dev/nvme0n1p4 rw +39 26 0:32 / /tmp rw,nosuid,nodev shared:17 - tmpfs tmpfs rw,size=16427752k,nr_inodes=409600,inode64 diff --git a/plugins/inputs/system/ps.go b/plugins/inputs/system/ps.go index 187fec3d7a794..712871f1a35c9 100644 --- a/plugins/inputs/system/ps.go +++ b/plugins/inputs/system/ps.go @@ -5,6 +5,7 @@ import ( "path/filepath" "strings" + "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" "github.com/shirou/gopsutil/v3/cpu" @@ -34,11 +35,12 @@ type PSDiskDeps interface { } func NewSystemPS() *SystemPS { - return &SystemPS{&SystemPSDisk{}} + return &SystemPS{PSDiskDeps: &SystemPSDisk{}} } type SystemPS struct { PSDiskDeps + Log telegraf.Logger `toml:"-"` } type SystemPSDisk struct{} @@ -97,10 +99,17 @@ func (s *SystemPS) DiskUsage( for i := range parts { p := parts[i] + if s.Log != nil { + s.Log.Debugf("[SystemPS] partition %d: %v", i, p) + } + if len(mountPointFilter) > 0 { // If the mount point is not a member of the filter set, // don't gather info on it. if _, ok := mountPointFilterSet[p.Mountpoint]; !ok { + if s.Log != nil { + s.Log.Debug("[SystemPS] => dropped by mount-point filter") + } continue } } @@ -108,22 +117,43 @@ func (s *SystemPS) DiskUsage( // If the mount point is a member of the exclude set, // don't gather info on it. if _, ok := fstypeExcludeSet[p.Fstype]; ok { + if s.Log != nil { + s.Log.Debug("[SystemPS] => dropped by filesystem-type filter") + } continue } - // If there's a host mount prefix, exclude any paths which conflict - // with the prefix. - if len(hostMountPrefix) > 0 && - !strings.HasPrefix(p.Mountpoint, hostMountPrefix) && - paths[hostMountPrefix+p.Mountpoint] { - continue + // If there's a host mount prefix use it as newer gopsutil version check for + // the init's mountpoints usually pointing to the host-mountpoint but in the + // container. This won't work for checking the disk-usage as the disks are + // mounted at HOST_MOUNT_PREFIX... + mountpoint := p.Mountpoint + if hostMountPrefix != "" && !strings.HasPrefix(p.Mountpoint, hostMountPrefix) { + mountpoint = filepath.Join(hostMountPrefix, p.Mountpoint) + // Exclude conflicting paths + if paths[mountpoint] { + if s.Log != nil { + s.Log.Debug("[SystemPS] => dropped by mount prefix") + } + continue + } + } + if s.Log != nil { + s.Log.Debugf("[SystemPS] -> using mountpoint %q...", mountpoint) } - du, err := s.PSDiskUsage(p.Mountpoint) + du, err := s.PSDiskUsage(mountpoint) if err != nil { + if s.Log != nil { + s.Log.Debugf("[SystemPS] => dropped by disk usage (%q): %v", mountpoint, err) + } continue } + if s.Log != nil { + s.Log.Debug("[SystemPS] => kept...") + } + du.Path = filepath.Join("/", strings.TrimPrefix(p.Mountpoint, hostMountPrefix)) du.Fstype = p.Fstype usage = append(usage, du)