Skip to content

Commit

Permalink
[receiver/awscontainerinsightreceiver] Add option to override cluster…
Browse files Browse the repository at this point in the history
… name (open-telemetry#3)

* Add option to override cluster name
  • Loading branch information
sky333999 authored Apr 17, 2023
1 parent ee1e4f1 commit 1f07035
Show file tree
Hide file tree
Showing 12 changed files with 76 additions and 22 deletions.
6 changes: 5 additions & 1 deletion receiver/awscontainerinsightreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ AWS Container Insights Receiver (`awscontainerinsightreceiver`) is an AWS specif
and summarize metrics and logs from your containerized applications and microservices. Data are collected as as performance log events
using [embedded metric format](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format.html). From the EMF data, Amazon CloudWatch can create the aggregated CloudWatch metrics at the cluster, node, pod, task, and service level.

CloudWatch Container Insights has been supported by [ECS Agent](https://github.com/aws/amazon-ecs-agent) and [CloudWatch Agent](https://github.com/aws/amazon-cloudwatch-agent) to collect infrastructure metrics for many resources such as such as CPU, memory, disk, and network. To migrate existing customers to use OpenTelemetry, AWS Container Insights Receiver (together with CloudWatch EMF Exporter) aims to support the same CloudWatch Container Insights experience for the following platforms:
CloudWatch Container Insights has been supported by [ECS Agent](https://github.com/aws/amazon-ecs-agent) and [CloudWatch Agent](https://github.com/aws/amazon-cloudwatch-agent) to collect infrastructure metrics for many resources such as CPU, memory, disk, and network. To migrate existing customers to use OpenTelemetry, AWS Container Insights Receiver (together with CloudWatch EMF Exporter) aims to support the same CloudWatch Container Insights experience for the following platforms:
* Amazon ECS
* Amazon EKS
* Kubernetes platforms on Amazon EC2
Expand Down Expand Up @@ -55,6 +55,10 @@ The "PodName" attribute is set based on the name of the relevant controllers lik

The "FullPodName" attribute is the pod name including suffix. If false FullPodName label is not added. The default value is false

**cluster_name (optional)**

"ClusterName" can be used to explicitly provide the cluster's name for EKS/ECS NOT on EC2 since it's not possible to auto-detect it using EC2 tags.

## Sample configuration for Container Insights
This is a sample configuration for AWS Container Insights using the `awscontainerinsightreceiver` and `awsemfexporter` for an EKS cluster:
```
Expand Down
4 changes: 4 additions & 0 deletions receiver/awscontainerinsightreceiver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,8 @@ type Config struct {
// If false FullPodName label is not added
// The default value is false
AddFullPodNameMetricLabel bool `mapstructure:"add_full_pod_name_metric_label"`

// ClusterName can be used to explicitly provide the Cluster's Name for scenarios where it's not
// possible to auto-detect it using EC2 tags.
ClusterName string `mapstructure:"cluster_name"`
}
10 changes: 10 additions & 0 deletions receiver/awscontainerinsightreceiver/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ func TestLoadConfig(t *testing.T) {
PrefFullPodName: false,
},
},
{
id: component.NewIDWithName(typeStr, "cluster_name"),
expected: &Config{
CollectionInterval: 60 * time.Second,
ContainerOrchestrator: "eks",
TagService: true,
PrefFullPodName: false,
ClusterName: "override_cluster",
},
},
}

for _, tt := range tests {
Expand Down
4 changes: 4 additions & 0 deletions receiver/awscontainerinsightreceiver/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ const (

// Don't tag pod full name by default
defaultAddFullPodNameMetricLabel = false

// Rely on EC2 tags to auto-detect cluster name by default
defaultClusterName = ""
)

// NewFactory creates a factory for AWS container insight receiver
Expand All @@ -63,6 +66,7 @@ func createDefaultConfig() component.Config {
TagService: defaultTagService,
PrefFullPodName: defaultPrefFullPodName,
AddFullPodNameMetricLabel: defaultAddFullPodNameMetricLabel,
ClusterName: defaultClusterName,
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,13 @@ type ContainerInstance struct {
ContainerInstanceArn string
}

func newECSInstanceInfo(ctx context.Context, ecsAgentEndpointProvider hostIPProvider,
func newECSInstanceInfo(ctx context.Context, clusterName string, ecsAgentEndpointProvider hostIPProvider,
refreshInterval time.Duration, logger *zap.Logger, httpClient doer, readyC chan bool) containerInstanceInfoProvider {
cii := &containerInstanceInfo{
logger: logger,
httpClient: httpClient,
refreshInterval: refreshInterval,
clusterName: clusterName,
ecsAgentEndpointProvider: ecsAgentEndpointProvider,
readyC: readyC,
}
Expand Down Expand Up @@ -93,7 +94,9 @@ func (cii *containerInstanceInfo) refresh(ctx context.Context) {
}

cii.Lock()
cii.clusterName = cluster
if cii.clusterName == "" { // Update cluster name only when it wasn't already provided via config
cii.clusterName = cluster
}
cii.containerInstanceID = instanceID
defer cii.Unlock()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func TestECSInstanceInfo(t *testing.T) {
}

// normal case
ecsinstanceinfo := newECSInstanceInfo(ctx, hostIPProvider, time.Minute, zap.NewNop(), mockHTTP, instanceReadyC)
ecsinstanceinfo := newECSInstanceInfo(ctx, "", hostIPProvider, time.Minute, zap.NewNop(), mockHTTP, instanceReadyC)

assert.NotNil(t, ecsinstanceinfo)

Expand All @@ -86,11 +86,10 @@ func TestECSInstanceInfo(t *testing.T) {
response: httpResponse,
err: err,
}
ecsinstanceinfo = newECSInstanceInfo(ctx, hostIPProvider, time.Minute, zap.NewNop(), mockHTTP, instanceReadyC)
ecsinstanceinfo = newECSInstanceInfo(ctx, "override-cluster", hostIPProvider, time.Minute, zap.NewNop(), mockHTTP, instanceReadyC)

assert.NotNil(t, ecsinstanceinfo)

assert.Equal(t, "", ecsinstanceinfo.GetClusterName())
assert.Equal(t, "override-cluster", ecsinstanceinfo.GetClusterName())
assert.Equal(t, "", ecsinstanceinfo.GetContainerInstanceID())

}
18 changes: 14 additions & 4 deletions receiver/awscontainerinsightreceiver/internal/ecsInfo/ecsinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type hostIPProvider interface {
type EcsInfo struct {
logger *zap.Logger
refreshInterval time.Duration
clusterName string
cancel context.CancelFunc
hostIPProvider hostIPProvider
isTaskInfoReadyC chan bool
Expand All @@ -47,7 +48,7 @@ type EcsInfo struct {
ecsTaskInfo ecsTaskInfoProvider
cgroup cgroupScannerProvider

containerInstanceInfoCreator func(context.Context, hostIPProvider, time.Duration, *zap.Logger, doer, chan bool) containerInstanceInfoProvider
containerInstanceInfoCreator func(context.Context, string, hostIPProvider, time.Duration, *zap.Logger, doer, chan bool) containerInstanceInfoProvider
ecsTaskInfoCreator func(context.Context, hostIPProvider, time.Duration, *zap.Logger, doer, chan bool) ecsTaskInfoProvider
cgroupScannerCreator func(context.Context, *zap.Logger, ecsTaskInfoProvider, containerInstanceInfoProvider, time.Duration) cgroupScannerProvider
}
Expand Down Expand Up @@ -81,16 +82,25 @@ func (e *EcsInfo) GetContainerInstanceID() string {
}

func (e *EcsInfo) GetClusterName() string {
if e.clusterName != "" {
return e.clusterName
}
if e.containerInstanceInfo != nil {
return e.containerInstanceInfo.GetClusterName()
}
return ""
}

type ecsInfoOption func(*EcsInfo)
func WithClusterName(name string) Option {
return func(info *EcsInfo) {
info.clusterName = name
}
}

type Option func(*EcsInfo)

// New creates a k8sApiServer which can generate cluster-level metrics
func NewECSInfo(refreshInterval time.Duration, hostIPProvider hostIPProvider, host component.Host, settings component.TelemetrySettings, options ...ecsInfoOption) (*EcsInfo, error) {
func NewECSInfo(refreshInterval time.Duration, hostIPProvider hostIPProvider, host component.Host, settings component.TelemetrySettings, options ...Option) (*EcsInfo, error) {
setting := confighttp.HTTPClientSettings{
Timeout: defaultTimeout,
}
Expand Down Expand Up @@ -139,7 +149,7 @@ func (e *EcsInfo) initContainerInfo(ctx context.Context) {

e.logger.Info("instance ip is ready and begin initializing ecs container info")

e.containerInstanceInfo = e.containerInstanceInfoCreator(ctx, e.hostIPProvider, e.refreshInterval, e.logger, e.httpClient, e.isContainerInfoReadyC)
e.containerInstanceInfo = e.containerInstanceInfoCreator(ctx, e.clusterName, e.hostIPProvider, e.refreshInterval, e.logger, e.httpClient, e.isContainerInfoReadyC)
close(e.containerInfoTestReadyC)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func TestNewECSInfo(t *testing.T) {
// test the case when containerInstanceInfor fails to initialize
containerInstanceInfoCreatorOpt := func(ei *EcsInfo) {

ei.containerInstanceInfoCreator = func(context.Context, hostIPProvider, time.Duration, *zap.Logger, doer, chan bool) containerInstanceInfoProvider {
ei.containerInstanceInfoCreator = func(context.Context, string, hostIPProvider, time.Duration, *zap.Logger, doer, chan bool) containerInstanceInfoProvider {
return &MockInstanceInfo{
clusterName: "Cluster-name",
instanceID: "instance-id",
Expand Down
14 changes: 12 additions & 2 deletions receiver/awscontainerinsightreceiver/internal/host/hostinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type Info struct {
awsSession *session.Session
refreshInterval time.Duration
containerOrchestrator string
clusterName string
instanceIDReadyC chan bool // close of this channel indicates instance ID is ready
instanceIPReadyC chan bool // close of this channel indicates instance Ip is ready

Expand All @@ -51,10 +52,16 @@ type Info struct {
ec2TagsCreator func(context.Context, *session.Session, string, string, string, time.Duration, *zap.Logger, ...ec2TagsOption) ec2TagsProvider
}

type machineInfoOption func(*Info)
type Option func(*Info)

func WithClusterName(name string) Option {
return func(info *Info) {
info.clusterName = name
}
}

// NewInfo creates a new Info struct
func NewInfo(containerOrchestrator string, refreshInterval time.Duration, logger *zap.Logger, options ...machineInfoOption) (*Info, error) {
func NewInfo(containerOrchestrator string, refreshInterval time.Duration, logger *zap.Logger, options ...Option) (*Info, error) {
ctx, cancel := context.WithCancel(context.Background())
mInfo := &Info{
cancel: cancel,
Expand Down Expand Up @@ -156,6 +163,9 @@ func (m *Info) GetEBSVolumeID(devName string) string {

// GetClusterName returns the cluster name associated with the host
func (m *Info) GetClusterName() string {
if m.clusterName != "" {
return m.clusterName
}
if m.ec2Tags != nil {
return m.ec2Tags.getClusterName()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func TestInfo(t *testing.T) {
}
m, err := NewInfo(ci.EKS, time.Minute, zap.NewNop(), nodeCapacityCreatorOpt)
assert.Nil(t, m)
assert.NotNil(t, err)
assert.Error(t, err)

// test the case when aws session fails to initialize
nodeCapacityCreatorOpt = func(m *Info) {
Expand All @@ -105,7 +105,7 @@ func TestInfo(t *testing.T) {
}
m, err = NewInfo(ci.EKS, time.Minute, zap.NewNop(), nodeCapacityCreatorOpt, awsSessionCreatorOpt)
assert.Nil(t, m)
assert.NotNil(t, err)
assert.Error(t, err)

// test normal case where everything is working
awsSessionCreatorOpt = func(m *Info) {
Expand Down Expand Up @@ -133,7 +133,7 @@ func TestInfo(t *testing.T) {
}
m, err = NewInfo(ci.EKS, time.Minute, zap.NewNop(), awsSessionCreatorOpt,
nodeCapacityCreatorOpt, ec2MetadataCreatorOpt, ebsVolumeCreatorOpt, ec2TagsCreatorOpt)
assert.Nil(t, err)
assert.NoError(t, err)
assert.NotNil(t, m)

// befoe ebsVolume and ec2Tags are initialized
Expand All @@ -153,6 +153,14 @@ func TestInfo(t *testing.T) {
assert.Equal(t, "ebs-volume-id", m.GetEBSVolumeID("dev"))
assert.Equal(t, "cluster-name", m.GetClusterName())
assert.Equal(t, "asg", m.GetAutoScalingGroupName())

// Test with cluster name override
m, err = NewInfo(ci.EKS, time.Minute, zap.NewNop(), awsSessionCreatorOpt,
nodeCapacityCreatorOpt, ec2MetadataCreatorOpt, ebsVolumeCreatorOpt, ec2TagsCreatorOpt, WithClusterName("override-cluster"))
assert.NoError(t, err)
assert.NotNil(t, m)

assert.Equal(t, "override-cluster", m.GetClusterName())
}

func TestInfoForECS(t *testing.T) {
Expand All @@ -164,7 +172,7 @@ func TestInfoForECS(t *testing.T) {
}
m, err := NewInfo(ci.ECS, time.Minute, zap.NewNop(), nodeCapacityCreatorOpt)
assert.Nil(t, m)
assert.NotNil(t, err)
assert.Error(t, err)

// test the case when aws session fails to initialize
nodeCapacityCreatorOpt = func(m *Info) {
Expand All @@ -179,7 +187,7 @@ func TestInfoForECS(t *testing.T) {
}
m, err = NewInfo(ci.ECS, time.Minute, zap.NewNop(), nodeCapacityCreatorOpt, awsSessionCreatorOpt)
assert.Nil(t, m)
assert.NotNil(t, err)
assert.Error(t, err)

// test normal case where everything is working
awsSessionCreatorOpt = func(m *Info) {
Expand Down Expand Up @@ -207,7 +215,7 @@ func TestInfoForECS(t *testing.T) {
}
m, err = NewInfo(ci.ECS, time.Minute, zap.NewNop(), awsSessionCreatorOpt,
nodeCapacityCreatorOpt, ec2MetadataCreatorOpt, ebsVolumeCreatorOpt, ec2TagsCreatorOpt)
assert.Nil(t, err)
assert.NoError(t, err)
assert.NotNil(t, m)

// befoe ebsVolume and ec2Tags are initialized
Expand Down
4 changes: 2 additions & 2 deletions receiver/awscontainerinsightreceiver/receiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func newAWSContainerInsightReceiver(
func (acir *awsContainerInsightReceiver) Start(ctx context.Context, host component.Host) error {
ctx, acir.cancel = context.WithCancel(ctx)

hostinfo, err := hostInfo.NewInfo(acir.config.ContainerOrchestrator, acir.config.CollectionInterval, acir.settings.Logger)
hostinfo, err := hostInfo.NewInfo(acir.config.ContainerOrchestrator, acir.config.CollectionInterval, acir.settings.Logger, hostInfo.WithClusterName(acir.config.ClusterName))
if err != nil {
return err
}
Expand All @@ -93,7 +93,7 @@ func (acir *awsContainerInsightReceiver) Start(ctx context.Context, host compone
}
if acir.config.ContainerOrchestrator == ci.ECS {

ecsInfo, err := ecsinfo.NewECSInfo(acir.config.CollectionInterval, hostinfo, host, acir.settings)
ecsInfo, err := ecsinfo.NewECSInfo(acir.config.CollectionInterval, hostinfo, host, acir.settings, ecsinfo.WithClusterName(acir.config.ClusterName))
if err != nil {
return err
}
Expand Down
2 changes: 2 additions & 0 deletions receiver/awscontainerinsightreceiver/testdata/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ awscontainerinsightreceiver:
container_orchestrator: eks
awscontainerinsightreceiver/collection_interval_settings:
collection_interval: 60s
awscontainerinsightreceiver/cluster_name:
cluster_name: override_cluster

0 comments on commit 1f07035

Please sign in to comment.