Skip to content

Commit

Permalink
Use HostInfo to populate instanceId, InstanceType (#229)
Browse files Browse the repository at this point in the history
* Use HostInfo to populate instanceId, InstanceType

* Fix lint for scraper test
  • Loading branch information
sam6134 authored Aug 30, 2024
1 parent 13f78ca commit 76facad
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ func GetNeuronMetricRelabelConfigs(hostinfo prometheusscraper.HostInfoProvider)
SourceLabels: model.LabelNames{"instance_id"},
TargetLabel: ci.InstanceID,
Regex: relabel.MustNewRegexp("(.*)"),
Replacement: "${1}",
Replacement: hostinfo.GetInstanceID(),
Action: relabel.Replace,
},
{
SourceLabels: model.LabelNames{"instance_type"},
TargetLabel: ci.InstanceType,
Regex: relabel.MustNewRegexp("(.*)"),
Replacement: "${1}",
Replacement: hostinfo.GetInstanceType(),
Action: relabel.Replace,
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ hardware_ecc_events_total{availability_zone="us-east-1c",event_type="sram_ecc_un
const dummyClusterName = "cluster-name"
const dummyHostName = "i-000000000"
const dummyNodeName = "dummy-nodeName"
const dummyInstanceType = "instance-type"

type mockHostInfoProvider struct {
}
Expand All @@ -51,13 +52,17 @@ func (m mockHostInfoProvider) GetInstanceID() string {
return dummyHostName
}

func (m mockHostInfoProvider) GetInstanceType() string {
return dummyInstanceType
}

func TestNewNeuronScraperEndToEnd(t *testing.T) {
t.Setenv("HOST_NAME", dummyNodeName)
expectedMetrics := make(map[string]prometheusscraper.ExpectedMetricStruct)
expectedMetrics["neuroncore_utilization_ratio"] = prometheusscraper.ExpectedMetricStruct{
MetricValue: 0.1,
MetricLabels: []prometheusscraper.MetricLabel{
{LabelName: "InstanceId", LabelValue: "i-09db9b55e0095612f"},
{LabelName: "InstanceId", LabelValue: dummyHostName},
{LabelName: "ClusterName", LabelValue: dummyClusterName},
{LabelName: "NeuronCore", LabelValue: "0"},
{LabelName: "NodeName", LabelValue: dummyNodeName},
Expand All @@ -66,7 +71,7 @@ func TestNewNeuronScraperEndToEnd(t *testing.T) {
expectedMetrics["neurondevice_hw_ecc_events_total_mem_ecc_corrected"] = prometheusscraper.ExpectedMetricStruct{
MetricValue: 3,
MetricLabels: []prometheusscraper.MetricLabel{
{LabelName: "InstanceId", LabelValue: "i-09db9b55e0095612f"},
{LabelName: "InstanceId", LabelValue: dummyHostName},
{LabelName: "ClusterName", LabelValue: dummyClusterName},
{LabelName: "NeuronDevice", LabelValue: "5"},
{LabelName: "NodeName", LabelValue: dummyNodeName},
Expand All @@ -75,7 +80,7 @@ func TestNewNeuronScraperEndToEnd(t *testing.T) {
expectedMetrics["neuron_runtime_memory_used_bytes"] = prometheusscraper.ExpectedMetricStruct{
MetricValue: 9.043968e+06,
MetricLabels: []prometheusscraper.MetricLabel{
{LabelName: "InstanceId", LabelValue: "i-09db9b55e0095612f"},
{LabelName: "InstanceId", LabelValue: dummyHostName},
{LabelName: "ClusterName", LabelValue: dummyClusterName},
{LabelName: "NodeName", LabelValue: dummyNodeName},
},
Expand All @@ -84,7 +89,7 @@ func TestNewNeuronScraperEndToEnd(t *testing.T) {
expectedMetrics["execution_errors_created"] = prometheusscraper.ExpectedMetricStruct{
MetricValue: 1.7083389404380567e+09,
MetricLabels: []prometheusscraper.MetricLabel{
{LabelName: "InstanceId", LabelValue: "i-09db9b55e0095612f"},
{LabelName: "InstanceId", LabelValue: dummyHostName},
{LabelName: "ClusterName", LabelValue: dummyClusterName},
{LabelName: "NodeName", LabelValue: dummyNodeName},
},
Expand All @@ -93,7 +98,7 @@ func TestNewNeuronScraperEndToEnd(t *testing.T) {
expectedMetrics["system_memory_total_bytes"] = prometheusscraper.ExpectedMetricStruct{
MetricValue: 5.32523487232e+011,
MetricLabels: []prometheusscraper.MetricLabel{
{LabelName: "InstanceId", LabelValue: "i-09db9b55e0095612f"},
{LabelName: "InstanceId", LabelValue: dummyHostName},
{LabelName: "ClusterName", LabelValue: dummyClusterName},
{LabelName: "NodeName", LabelValue: dummyNodeName},
},
Expand All @@ -102,7 +107,7 @@ func TestNewNeuronScraperEndToEnd(t *testing.T) {
expectedMetrics["hardware_ecc_events_total"] = prometheusscraper.ExpectedMetricStruct{
MetricValue: 864.0,
MetricLabels: []prometheusscraper.MetricLabel{
{LabelName: "InstanceId", LabelValue: "i-09db9b55e0095612f"},
{LabelName: "InstanceId", LabelValue: dummyHostName},
{LabelName: "ClusterName", LabelValue: dummyClusterName},
{LabelName: "NodeName", LabelValue: dummyNodeName},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type SimplePrometheusScraperOpts struct {
type HostInfoProvider interface {
GetClusterName() string
GetInstanceID() string
GetInstanceType() string
}

func NewSimplePrometheusScraper(opts SimplePrometheusScraperOpts) (*SimplePrometheusScraper, error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ func (m mockHostInfoProvider) GetInstanceID() string {
return "i-000000000"
}

func (m mockHostInfoProvider) GetInstanceType() string {
return "instance-type"
}

func TestSimplePrometheusScraperBadInputs(t *testing.T) {
settings := componenttest.NewNopTelemetrySettings()
settings.Logger, _ = zap.NewDevelopment()
Expand Down

0 comments on commit 76facad

Please sign in to comment.