From 5ca9df2e3c1c66c241dc62f1cf319773c790f085 Mon Sep 17 00:00:00 2001 From: Dani Louca <59848726+dloucasfx@users.noreply.github.com> Date: Tue, 16 Aug 2022 15:18:39 -0400 Subject: [PATCH] Make VM metrics host dimension configurable (#2397) Signed-off-by: Dani Louca --- docs/monitors/vsphere.md | 2 ++ pkg/monitors/vsphere/metadata.yaml | 3 ++ pkg/monitors/vsphere/model/model.go | 27 ++++++++++++++++ .../vsphere/service/fake_gateway_test.go | 7 +++-- pkg/monitors/vsphere/service/inventory.go | 13 ++++++-- .../vsphere/service/inventory_test.go | 31 ++++++++++++++----- pkg/monitors/vsphere/service/metrics_test.go | 4 ++- pkg/monitors/vsphere/service/points.go | 8 +++-- pkg/monitors/vsphere/service/points_test.go | 4 ++- pkg/monitors/vsphere/vsphere_monitor.go | 2 +- selfdescribe.json | 11 +++++++ 11 files changed, 93 insertions(+), 19 deletions(-) diff --git a/docs/monitors/vsphere.md b/docs/monitors/vsphere.md index 13162a8506..a3b80bd58b 100644 --- a/docs/monitors/vsphere.md +++ b/docs/monitors/vsphere.md @@ -66,6 +66,7 @@ Configuration](../monitor-config.md#common-configuration).** | `inventoryRefreshInterval` | no | `int64` | How often to reload the inventory and inventory metrics (**default:** `60s`) | | `perfBatchSize` | no | `integer` | Maximum number of inventory objects to be queried for performance data per request. Set this value to zero (0) to request performance data for all inventory objects at a time. (**default:** `10`) | | `filter` | no | `string` | An 'expr' expression to limit the inventory traversed by the monitor. Leave blank or omit to traverse and get metrics for the entire vSphere inventory. Otherwise, this expression is evaluated per cluster. If the expression evaluates to true, metrics are collected for the objects in the cluster, otherwise it is skipped. Made available to the expr expression environment are the variables: `Datacenter` and `Cluster`. For example: filter: "Datacenter == 'MyDatacenter' && Cluster == 'MyCluster'" The above expr value will cause metrics collection for only the given datacenter + cluster. See https://github.com/antonmedv/expr for more advanced syntax. | +| `vmHostDimension` | no | `string` | The host dimension value set for monitored VMs. The options are: `ip`, `hostname` and `disable`. Default is `ip`. `ip` : the VM IP if available `hostname` : the VM Hostname if available `disable` : the vsphere monitor does not set the host dimension on the VM metrics (**default:** `ip`) | | `tlsCACertPath` | no | `string` | Path to the ca file | | `tlsClientCertificatePath` | no | `string` | Configure client certs. Both tlsClientKeyPath and tlsClientCertificatePath must be present. The files must contain PEM encoded data. Path to the client certificate | | `tlsClientKeyPath` | no | `string` | Path to the keyfile | @@ -360,6 +361,7 @@ dimensions may be specific to certain metrics. | `guest_family` | For virtual machine metrics, the guest operating system family for the virtual machine to which this metric pertains. For example 'linuxGuest'. | | `guest_fullname` | For virtual machine metrics, the full name of the guest operating system for the virtual machine to which the metric pertains. For example 'Windows 2000 Professional'. | | `guest_id` | For virtual machine metrics, the guest identifier of the virtual machine to which the metric pertains. | +| `host` | For virtual machine metrics, the host dimension will be set to the VM IP instead of the running agent hostname. This behavior is configurable; check the `vmHostDimension` option for details. | | `object` | For some metrics, the source of the metric. For example, the identifier of a NIC for a network metric, or the processor number for a CPU metric. | | `object_type` | The type of the resource to which the metric pertains. Values can be 'VirtualMachine' for a VM, or 'HostSystem' for an ESX host. | | `ref_id` | The unique vCenter identifier for the resource to which the metric pertains. | diff --git a/pkg/monitors/vsphere/metadata.yaml b/pkg/monitors/vsphere/metadata.yaml index 31b6f07d5d..c10710ea13 100644 --- a/pkg/monitors/vsphere/metadata.yaml +++ b/pkg/monitors/vsphere/metadata.yaml @@ -56,6 +56,9 @@ monitors: guest_fullname: description: For virtual machine metrics, the full name of the guest operating system for the virtual machine to which the metric pertains. For example 'Windows 2000 Professional'. + host: + description: For virtual machine metrics, the host dimension will be set to the VM IP instead of the running agent hostname. + This behavior is configurable; check the `vmHostDimension` option for details. object: description: For some metrics, the source of the metric. For example, the identifier of a NIC for a network metric, or the processor number for a CPU metric. diff --git a/pkg/monitors/vsphere/model/model.go b/pkg/monitors/vsphere/model/model.go index 2f4d0f0b03..d2d2aec855 100644 --- a/pkg/monitors/vsphere/model/model.go +++ b/pkg/monitors/vsphere/model/model.go @@ -1,6 +1,8 @@ package model import ( + "fmt" + "github.com/signalfx/signalfx-agent/pkg/core/config" "github.com/signalfx/signalfx-agent/pkg/utils/timeutil" "github.com/vmware/govmomi/vim25/types" @@ -18,6 +20,14 @@ const ( FolderType = "Folder" ) +type VMHostDim string + +const ( + GuestIP VMHostDim = "ip" + GuestHostName VMHostDim = "hostname" + Disable VMHostDim = "disable" +) + // Config for the vSphere monitor type Config struct { config.MonitorConfig `yaml:",inline" acceptsEndpoints:"true"` @@ -46,6 +56,13 @@ type Config struct { // See https://github.com/antonmedv/expr for more advanced syntax. Filter string `yaml:"filter"` + // The host dimension value set for monitored VMs. + // The options are: `ip`, `hostname` and `disable`. Default is `ip`. + // `ip` : the VM IP if available + // `hostname` : the VM Hostname if available + // `disable` : the vsphere monitor does not set the host dimension on the VM metrics + VMHostDimension VMHostDim `yaml:"vmHostDimension" default:"ip"` + // Path to the ca file TLSCACertPath string `yaml:"tlsCACertPath"` @@ -70,6 +87,16 @@ type Inventory struct { DimensionMap map[string]Dimensions } +// Validate that VMHostDimension is one of the supported options +func (c *Config) Validate() error { + switch c.VMHostDimension { + case GuestIP, GuestHostName, Disable: + return nil + default: + return fmt.Errorf("hostDimensionValue '%s' is invalid, it can only be '%s', '%s' or '%s'", c.VMHostDimension, GuestIP, GuestHostName, Disable) + } +} + func NewInventoryObject(ref types.ManagedObjectReference, extraDimensions map[string]string) *InventoryObject { dimensions := map[string]string{ "ref_id": ref.Value, diff --git a/pkg/monitors/vsphere/service/fake_gateway_test.go b/pkg/monitors/vsphere/service/fake_gateway_test.go index f41f3ae02f..5349a006da 100644 --- a/pkg/monitors/vsphere/service/fake_gateway_test.go +++ b/pkg/monitors/vsphere/service/fake_gateway_test.go @@ -88,6 +88,7 @@ func (g *fakeGateway) retrieveRefProperties(mor types.ManagedObjectReference, ds } t.Guest = &types.GuestInfo{ IpAddress: "1.2.3.4", + HostName: "foo.host.name", GuestFamily: "fooFam", GuestFullName: "fooFullName", } @@ -101,7 +102,7 @@ func (g *fakeGateway) retrieveRefProperties(mor types.ManagedObjectReference, ds return nil } -//noinspection GoUnusedParameter +// noinspection GoUnusedParameter func (g *fakeGateway) queryAvailablePerfMetric(ref types.ManagedObjectReference) (*types.QueryAvailablePerfMetricResponse, error) { counterID := g.metricIDCounter g.metricIDCounter++ @@ -112,12 +113,12 @@ func (g *fakeGateway) queryAvailablePerfMetric(ref types.ManagedObjectReference) }, nil } -//noinspection GoUnusedParameter +// noinspection GoUnusedParameter func (g *fakeGateway) queryPerfProviderSummary(mor types.ManagedObjectReference) (*types.QueryPerfProviderSummaryResponse, error) { panic("implement me") } -//noinspection GoUnusedParameter +// noinspection GoUnusedParameter func (g *fakeGateway) queryPerf(inv []*model.InventoryObject, maxSample int32) (*types.QueryPerfResponse, error) { var ret []types.BasePerfEntityMetricBase counter := 0 diff --git a/pkg/monitors/vsphere/service/inventory.go b/pkg/monitors/vsphere/service/inventory.go index f01df4a995..090a502845 100644 --- a/pkg/monitors/vsphere/service/inventory.go +++ b/pkg/monitors/vsphere/service/inventory.go @@ -13,10 +13,11 @@ type InventorySvc struct { log logrus.FieldLogger gateway IGateway f InvFilter + hostDim model.VMHostDim } -func NewInventorySvc(gateway IGateway, log logrus.FieldLogger, f InvFilter) *InventorySvc { - return &InventorySvc{gateway: gateway, f: f, log: log} +func NewInventorySvc(gateway IGateway, log logrus.FieldLogger, f InvFilter, hostDim model.VMHostDim) *InventorySvc { + return &InventorySvc{gateway: gateway, f: f, log: log, hostDim: hostDim} } // use slice semantics to build parent dimensions while traversing the inv tree @@ -205,10 +206,16 @@ func (svc *InventorySvc) followVM( dimVM: vm.Name, // e.g. "MyDebian10Host" dimGuestID: vm.Config.GuestId, // e.g. "debian10_64Guest" dimVMip: vm.Guest.IpAddress, - dimHost: vm.Guest.IpAddress, dimGuestFamily: vm.Guest.GuestFamily, // e.g. "linuxGuest" dimGuestFullname: vm.Guest.GuestFullName, // e.g. "Other 4.x or later Linux (64-bit)" } + + if svc.hostDim == model.GuestIP { + vmDims[dimHost] = vm.Guest.IpAddress + } else if svc.hostDim == model.GuestHostName { + vmDims[dimHost] = vm.Guest.HostName + } + amendDims(vmDims, dims) vmInvObj := model.NewInventoryObject(vm.Self, vmDims) inv.AddObject(vmInvObj) diff --git a/pkg/monitors/vsphere/service/inventory_test.go b/pkg/monitors/vsphere/service/inventory_test.go index ff560593b7..4be04fcca3 100644 --- a/pkg/monitors/vsphere/service/inventory_test.go +++ b/pkg/monitors/vsphere/service/inventory_test.go @@ -12,7 +12,7 @@ import ( func TestNopFilter(t *testing.T) { gateway := newFakeGateway(1) f := nopFilter{} - svc := NewInventorySvc(gateway, testLog, f) + svc := NewInventorySvc(gateway, testLog, f, model.GuestIP) inv, err := svc.RetrieveInventory() require.NoError(t, err) require.Equal(t, 4, len(inv.Objects)) @@ -22,7 +22,7 @@ func TestExprFilterOutAllInventory(t *testing.T) { gateway := newFakeGateway(1) f, err := NewFilter("Datacenter == 'X'") require.NoError(t, err) - svc := NewInventorySvc(gateway, testLog, f) + svc := NewInventorySvc(gateway, testLog, f, model.GuestIP) inv, err := svc.RetrieveInventory() assert.NoError(t, err) assert.Equal(t, 0, len(inv.Objects)) @@ -32,7 +32,7 @@ func TestExprFilterInAllInventoryInClusters(t *testing.T) { gateway := newFakeGateway(1) f, err := NewFilter("Datacenter == 'foo dc' && Cluster == 'foo cluster'") require.NoError(t, err) - svc := NewInventorySvc(gateway, testLog, f) + svc := NewInventorySvc(gateway, testLog, f, model.GuestIP) inv, err := svc.RetrieveInventory() assert.NoError(t, err) assert.Equal(t, 2, len(inv.Objects)) @@ -43,7 +43,7 @@ func TestExprFilterInAllInventory(t *testing.T) { // there are two hosts in this DC that aren't in clusters for a total of four f, err := NewFilter("Datacenter == 'foo dc'") require.NoError(t, err) - svc := NewInventorySvc(gateway, testLog, f) + svc := NewInventorySvc(gateway, testLog, f, model.GuestIP) inv, err := svc.RetrieveInventory() assert.NoError(t, err) assert.Equal(t, 4, len(inv.Objects)) @@ -53,7 +53,7 @@ func TestFilterInInventory(t *testing.T) { gateway := newFakeGateway(1) f, err := NewFilter("") require.NoError(t, err) - svc := NewInventorySvc(gateway, testLog, f) + svc := NewInventorySvc(gateway, testLog, f, model.GuestIP) inv, err := svc.RetrieveInventory() assert.NoError(t, err) assert.True(t, len(inv.Objects) > 0) @@ -63,7 +63,7 @@ func TestFilterEmptyDC(t *testing.T) { gateway := newFakeGateway(1) f, err := NewFilter("") require.NoError(t, err) - svc := NewInventorySvc(gateway, testLog, f) + svc := NewInventorySvc(gateway, testLog, f, model.GuestIP) inv, err := svc.RetrieveInventory() assert.NoError(t, err) assert.True(t, len(inv.Objects) > 0) @@ -71,7 +71,7 @@ func TestFilterEmptyDC(t *testing.T) { func TestRetrieveInventory(t *testing.T) { gateway := newFakeGateway(1) - svc := NewInventorySvc(gateway, testLog, nopFilter{}) + svc := NewInventorySvc(gateway, testLog, nopFilter{}, model.GuestIP) inv, _ := svc.RetrieveInventory() requireClusterHost(t, inv, 0) requireClusterVM(t, inv, 1) @@ -79,6 +79,22 @@ func TestRetrieveInventory(t *testing.T) { requireFreeVM(t, inv, 3) } +func TestVMHostnameHostDim(t *testing.T) { + gateway := newFakeGateway(1) + svc := NewInventorySvc(gateway, testLog, nopFilter{}, model.GuestHostName) + inv, _ := svc.RetrieveInventory() + dims := getDims(inv, 1) + require.Equal(t, "foo.host.name", dims["host"]) +} + +func TestVMDisableHostDim(t *testing.T) { + gateway := newFakeGateway(1) + svc := NewInventorySvc(gateway, testLog, nopFilter{}, model.Disable) + inv, _ := svc.RetrieveInventory() + dims := getDims(inv, 1) + require.Empty(t, dims["host"]) +} + func requireClusterHost(t *testing.T, inv *model.Inventory, i int) { dims := getDims(inv, i) require.Equal(t, "foo cluster", dims["cluster"]) @@ -97,6 +113,7 @@ func requireClusterVM(t *testing.T, inv *model.Inventory, i int) { require.Equal(t, model.VMType, dims["object_type"]) require.Equal(t, "foo vm", dims["vm_name"]) require.Equal(t, "foo guest id", dims["guest_id"]) + require.Equal(t, "1.2.3.4", dims["host"]) } func requireFreeHost(t *testing.T, inv *model.Inventory, i int) { diff --git a/pkg/monitors/vsphere/service/metrics_test.go b/pkg/monitors/vsphere/service/metrics_test.go index 6c927463f2..da9837f506 100644 --- a/pkg/monitors/vsphere/service/metrics_test.go +++ b/pkg/monitors/vsphere/service/metrics_test.go @@ -4,12 +4,14 @@ import ( "testing" "github.com/stretchr/testify/require" + + "github.com/signalfx/signalfx-agent/pkg/monitors/vsphere/model" ) func TestPopulateInvMetrics(t *testing.T) { gateway := newFakeGateway(1) metricsSvc := NewMetricsService(gateway, testLog) - inventorySvc := NewInventorySvc(gateway, testLog, nopFilter{}) + inventorySvc := NewInventorySvc(gateway, testLog, nopFilter{}, model.GuestIP) inv, _ := inventorySvc.RetrieveInventory() metricsSvc.PopulateInvMetrics(inv) invObj := inv.Objects[0] diff --git a/pkg/monitors/vsphere/service/points.go b/pkg/monitors/vsphere/service/points.go index 73665a5df2..0bf6fafe15 100644 --- a/pkg/monitors/vsphere/service/points.go +++ b/pkg/monitors/vsphere/service/points.go @@ -116,9 +116,11 @@ func (svc *PointsSvc) FetchPoints(vsInfo *model.VsphereInfo, numSamplesReqd int3 sfxMetricType, perfEntityMetric.SampleInfo[i].Timestamp, ) - // make sure the agent doesn't overwrite the `host` dimension with the hostname - // the agent is running on - dp.Meta[dpmeta.NotHostSpecificMeta] = true + // If the host dimension is set, the make sure the agent doesn't overwrite + // the `host` dimension with the hostname the agent is running on + if _, ok := dims["host"]; ok { + dp.Meta[dpmeta.NotHostSpecificMeta] = true + } svc.ptConsumer(dp) } } diff --git a/pkg/monitors/vsphere/service/points_test.go b/pkg/monitors/vsphere/service/points_test.go index 7d71f7cb03..74bdda9dda 100644 --- a/pkg/monitors/vsphere/service/points_test.go +++ b/pkg/monitors/vsphere/service/points_test.go @@ -6,13 +6,15 @@ import ( "github.com/signalfx/golib/v3/datapoint" "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" + + "github.com/signalfx/signalfx-agent/pkg/monitors/vsphere/model" ) var testLog = logrus.WithField("monitorType", "vsphere-test") func TestRetrievePoints(t *testing.T) { gateway := newFakeGateway(1) - inventorySvc := NewInventorySvc(gateway, testLog, nopFilter{}) + inventorySvc := NewInventorySvc(gateway, testLog, nopFilter{}, model.GuestIP) metricsSvc := NewMetricsService(gateway, testLog) infoSvc := NewVSphereInfoService(inventorySvc, metricsSvc) vsphereInfo, _ := infoSvc.RetrieveVSphereInfo() diff --git a/pkg/monitors/vsphere/vsphere_monitor.go b/pkg/monitors/vsphere/vsphere_monitor.go index e8b21409bc..617b612840 100644 --- a/pkg/monitors/vsphere/vsphere_monitor.go +++ b/pkg/monitors/vsphere/vsphere_monitor.go @@ -75,7 +75,7 @@ func (vsm *vSphereMonitor) wireUpServices(ctx context.Context, client *govmomi.C vsm.log.WithError(err).Error("Failed to create filter") // can keep going, f == nopFilter } - vsm.invSvc = service.NewInventorySvc(gateway, vsm.log, f) + vsm.invSvc = service.NewInventorySvc(gateway, vsm.log, f, vsm.conf.VMHostDimension) vsm.metricSvc = service.NewMetricsService(gateway, vsm.log) vsm.timeSvc = service.NewTimeSvc(gateway) vsm.vsInfoSvc = service.NewVSphereInfoService(vsm.invSvc, vsm.metricSvc) diff --git a/selfdescribe.json b/selfdescribe.json index f482ac3e85..17e30d2fb5 100644 --- a/selfdescribe.json +++ b/selfdescribe.json @@ -55033,6 +55033,9 @@ "guest_id": { "description": "For virtual machine metrics, the guest identifier of the virtual machine to which the metric pertains." }, + "host": { + "description": "For virtual machine metrics, the host dimension will be set to the VM IP instead of the running agent hostname. This behavior is configurable; check the `vmHostDimension` option for details." + }, "object": { "description": "For some metrics, the source of the metric. For example, the identifier of a NIC for a network metric, or the processor number for a CPU metric." }, @@ -56573,6 +56576,14 @@ "type": "string", "elementKind": "" }, + { + "yamlName": "vmHostDimension", + "doc": "The host dimension value set for monitored VMs. The options are: `ip`, `hostname` and `disable`. Default is `ip`. `ip` : the VM IP if available `hostname` : the VM Hostname if available `disable` : the vsphere monitor does not set the host dimension on the VM metrics", + "default": "ip", + "required": false, + "type": "string", + "elementKind": "" + }, { "yamlName": "tlsCACertPath", "doc": "Path to the ca file",