From 005cb1f589f5c7ffb7cae929273bc03c25d5d87e Mon Sep 17 00:00:00 2001 From: Gavin Mealy Date: Mon, 20 Jun 2022 17:30:45 +0000 Subject: [PATCH] # This is a combination of 7 commits. # This is the 1st commit message: # This is a combination of 6 commits. # This is the 1st commit message: Initial commit for GetMetricData query utilities added configuration file for CWA test inital commit for data collection/GetMetricData portion of test initial commit for main test driver that starts agent and then calls data collection changed test error to fatal if error is returned by data collection added comments removed unnecessary code that prints output copy CWA config file to it's destination changed log print to error return in utilities. few small formatting changes implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code fixed creation of metric names for query and added comments fixed config spacing fixed agent runtime addressed more pull request comments. Moved config parsing to a new function Performance Tracking Integration Test and Data Collection (#497) * Initial commit for GetMetricData query utilities added configuration file for CWA test inital commit for data collection/GetMetricData portion of test initial commit for main test driver that starts agent and then calls data collection changed test error to fatal if error is returned by data collection added comments removed unnecessary code that prints output * copy CWA config file to it's destination * changed log print to error return in utilities. few small formatting changes * implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code * fixed creation of metric names for query and added comments * fixed config spacing * fixed agent runtime * addressed more pull request comments. Moved config parsing to a new function added statistics calculation and log monitoring to CWA config. test now writes to log files while CWA monitors Initial commit for GetMetricData query utilities added configuration file for CWA test inital commit for data collection/GetMetricData portion of test initial commit for main test driver that starts agent and then calls data collection changed test error to fatal if error is returned by data collection added comments removed unnecessary code that prints output copy CWA config file to it's destination changed log print to error return in utilities. few small formatting changes implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code fixed creation of metric names for query and added comments fixed config spacing fixed agent runtime addressed more pull request comments. Moved config parsing to a new function Performance Tracking Integration Test and Data Collection (#497) * Initial commit for GetMetricData query utilities added configuration file for CWA test inital commit for data collection/GetMetricData portion of test initial commit for main test driver that starts agent and then calls data collection changed test error to fatal if error is returned by data collection added comments removed unnecessary code that prints output * copy CWA config file to it's destination * changed log print to error return in utilities. few small formatting changes * implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code * fixed creation of metric names for query and added comments * fixed config spacing * fixed agent runtime * addressed more pull request comments. Moved config parsing to a new function Performance Tracking Integration Test and Data Collection (#497) * Initial commit for GetMetricData query utilities added configuration file for CWA test inital commit for data collection/GetMetricData portion of test initial commit for main test driver that starts agent and then calls data collection changed test error to fatal if error is returned by data collection added comments removed unnecessary code that prints output * copy CWA config file to it's destination * changed log print to error return in utilities. few small formatting changes * implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code * fixed creation of metric names for query and added comments * fixed config spacing * fixed agent runtime * addressed more pull request comments. Moved config parsing to a new function fixed merge conflicts # The commit message #2 will be skipped: # Reduce timeout for scrapping IMDS and give instruction when fail to scrape IMDS inside container (#480) # # * Fix Aggregrator Shut Down Behavior # # * Always setting hops to 2 if CloudWatchAgent is deployed as container # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instruction when fail to scrape IMDS inside container # The commit message #3 will be skipped: # Export Go Bin Path (#498) # # The commit message #4 will be skipped: # Performance Tracking Integration Test and Data Collection (#497) # # * Initial commit for GetMetricData query utilities # # added configuration file for CWA test # # inital commit for data collection/GetMetricData portion of test # # initial commit for main test driver that starts agent and then calls data collection # # changed test error to fatal if error is returned by data collection # # added comments # # removed unnecessary code that prints output # # * copy CWA config file to it's destination # # * changed log print to error return in utilities. few small formatting changes # # * implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code # # * fixed creation of metric names for query and added comments # # * fixed config spacing # # * fixed agent runtime # # * addressed more pull request comments. Moved config parsing to a new function # The commit message #5 will be skipped: # Update release note V352 (#502) # # The commit message #6 will be skipped: # Support escape path for MacOs, Linux (#499) # # * Support scrapt paths for Linux,MacOs # # * Support escape path for MacOs, Linux # The commit message #2 will be skipped: # Initial commit for GetMetricData query utilities # # added configuration file for CWA test # # inital commit for data collection/GetMetricData portion of test # # initial commit for main test driver that starts agent and then calls data collection # # changed test error to fatal if error is returned by data collection # # added comments # # removed unnecessary code that prints output # # copy CWA config file to it's destination # # changed log print to error return in utilities. few small formatting changes # # implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code # # fixed creation of metric names for query and added comments # # fixed config spacing # # fixed agent runtime # # addressed more pull request comments. Moved config parsing to a new function # # Performance Tracking Integration Test and Data Collection (#497) # # * Initial commit for GetMetricData query utilities # # added configuration file for CWA test # # inital commit for data collection/GetMetricData portion of test # # initial commit for main test driver that starts agent and then calls data collection # # changed test error to fatal if error is returned by data collection # # added comments # # removed unnecessary code that prints output # # * copy CWA config file to it's destination # # * changed log print to error return in utilities. few small formatting changes # # * implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code # # * fixed creation of metric names for query and added comments # # * fixed config spacing # # * fixed agent runtime # # * addressed more pull request comments. Moved config parsing to a new function # # added statistics calculation and log monitoring to CWA config. test now writes to log files while CWA monitors # # Initial commit for GetMetricData query utilities # # added configuration file for CWA test # # inital commit for data collection/GetMetricData portion of test # # initial commit for main test driver that starts agent and then calls data collection # # changed test error to fatal if error is returned by data collection # # added comments # # removed unnecessary code that prints output # # copy CWA config file to it's destination # # changed log print to error return in utilities. few small formatting changes # # implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code # # fixed creation of metric names for query and added comments # # fixed config spacing # # fixed agent runtime # # addressed more pull request comments. Moved config parsing to a new function # # Performance Tracking Integration Test and Data Collection (#497) # # * Initial commit for GetMetricData query utilities # # added configuration file for CWA test # # inital commit for data collection/GetMetricData portion of test # # initial commit for main test driver that starts agent and then calls data collection # # changed test error to fatal if error is returned by data collection # # added comments # # removed unnecessary code that prints output # # * copy CWA config file to it's destination # # * changed log print to error return in utilities. few small formatting changes # # * implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code # # * fixed creation of metric names for query and added comments # # * fixed config spacing # # * fixed agent runtime # # * addressed more pull request comments. Moved config parsing to a new function # # Performance Tracking Integration Test and Data Collection (#497) # # * Initial commit for GetMetricData query utilities # # added configuration file for CWA test # # inital commit for data collection/GetMetricData portion of test # # initial commit for main test driver that starts agent and then calls data collection # # changed test error to fatal if error is returned by data collection # # added comments # # removed unnecessary code that prints output # # * copy CWA config file to it's destination # # * changed log print to error return in utilities. few small formatting changes # # * implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code # # * fixed creation of metric names for query and added comments # # * fixed config spacing # # * fixed agent runtime # # * addressed more pull request comments. Moved config parsing to a new function # # fixed merge conflicts # # changed period in GetMetrics call 30->10 # The commit message #3 will be skipped: # Reduce timeout for scrapping IMDS and give instruction when fail to scrape IMDS inside container (#480) # # * Fix Aggregrator Shut Down Behavior # # * Always setting hops to 2 if CloudWatchAgent is deployed as container # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instrucstion when timeout--ammend # # * Reduce timeout for scrapping IMDS and give instruction when fail to scrape IMDS inside container # The commit message #4 will be skipped: # Export Go Bin Path (#498) # # The commit message #5 will be skipped: # Performance Tracking Integration Test and Data Collection (#497) # # * Initial commit for GetMetricData query utilities # # added configuration file for CWA test # # inital commit for data collection/GetMetricData portion of test # # initial commit for main test driver that starts agent and then calls data collection # # changed test error to fatal if error is returned by data collection # # added comments # # removed unnecessary code that prints output # # * copy CWA config file to it's destination # # * changed log print to error return in utilities. few small formatting changes # # * implemented feedback from PR. Biggest change is dynamically grabbing metric names from config file instead of them being declared in the code # # * fixed creation of metric names for query and added comments # # * fixed config spacing # # * fixed agent runtime # # * addressed more pull request comments. Moved config parsing to a new function # The commit message #6 will be skipped: # Update release note V352 (#502) # # The commit message #7 will be skipped: # Support escape path for MacOs, Linux (#499) # # * Support scrapt paths for Linux,MacOs # # * Support escape path for MacOs, Linux --- .github/workflows/build-test-linux.yml | 2 +- .github/workflows/build-test-macos.yml | 2 +- .github/workflows/build-test-windows.yml | 2 +- .github/workflows/integrationTest.yml | 80 +++- RELEASE_NOTES | 28 ++ .../source/Dockerfile | 1 + .../cwagent-daemonset.yaml | 2 +- .../start-amazon-cloudwatch-agent.go | 2 +- go.mod | 17 +- go.sum | 30 +- integration/terraform/ec2/README.md | 1 + integration/terraform/ec2/linux/main.tf | 6 +- integration/terraform/ec2/linux/variables.tf | 8 + .../performance_query_utils.go | 157 ++++++++ .../test/performancetest/performance_test.go | 107 ++++++ .../performancetest/resources/config.json | 114 ++++++ .../test/performancetest/transmitter.go | 266 ++++++++++++++ packaging/darwin/amazon-cloudwatch-agent-ctl | 12 +- .../dependencies/amazon-cloudwatch-agent-ctl | 20 +- plugins/processors/ec2tagger/constants.go | 79 ++++ plugins/processors/ec2tagger/ec2tagger.go | 252 ++++++------- .../processors/ec2tagger/ec2tagger_test.go | 346 ++++++++---------- translator/jsonconfig/mergeJsonConfig.go | 6 +- translator/util/ec2util/ec2util.go | 67 ++-- translator/util/sdkutil.go | 2 +- 25 files changed, 1190 insertions(+), 419 deletions(-) create mode 100644 integration/test/performancetest/performance_query_utils.go create mode 100644 integration/test/performancetest/performance_test.go create mode 100644 integration/test/performancetest/resources/config.json create mode 100644 integration/test/performancetest/transmitter.go create mode 100644 plugins/processors/ec2tagger/constants.go diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml index 8e0c316c17..73cfe3d05d 100644 --- a/.github/workflows/build-test-linux.yml +++ b/.github/workflows/build-test-linux.yml @@ -49,7 +49,7 @@ jobs: path: | ~/.cache/go-build ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + key: v1-go-pkg-mod-${{ runner.os }}-${{ hashFiles('**/go.sum') }} - name: Test run: make test diff --git a/.github/workflows/build-test-macos.yml b/.github/workflows/build-test-macos.yml index 384b1a4758..7c03007635 100644 --- a/.github/workflows/build-test-macos.yml +++ b/.github/workflows/build-test-macos.yml @@ -41,7 +41,7 @@ jobs: path: | ~/Library/Caches/go-build ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + key: v1-go-pkg-mod-${{ runner.os }}-${{ hashFiles('**/go.sum') }} - name: Test run: make test diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 6fae156ec8..ff209a1ff9 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -39,7 +39,7 @@ jobs: path: | %LocalAppData%\go-build ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + key: v1-go-pkg-mod-${{ runner.os }}-${{ hashFiles('**/go.sum') }} - name: Install make run: choco install make diff --git a/.github/workflows/integrationTest.yml b/.github/workflows/integrationTest.yml index 3c65c63fd9..5d69fcbf26 100644 --- a/.github/workflows/integrationTest.yml +++ b/.github/workflows/integrationTest.yml @@ -116,7 +116,7 @@ jobs: path: | ~/go/pkg/mod ~/.cache/go-build - key: v1-go-pkg-mod-${{ hashFiles('**/go.sum') }} + key: v1-go-pkg-mod-${{ runner.os }}-${{ hashFiles('**/go.sum') }} - name: Cache binaries id: cached_binaries @@ -615,4 +615,80 @@ jobs: max_attempts: 3 timeout_minutes: 8 retry_wait_seconds: 5 - command: cd integration/terraform/ecs/linux && terraform destroy --auto-approve \ No newline at end of file + command: cd integration/terraform/ecs/linux && terraform destroy --auto-approve + + PerformanceTrackingTest: + name: "PerformanceTrackingTest" + needs: [MakeBinary, StartLocalStack, GenerateTestMatrix] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.TERRAFORM_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.TERRAFORM_AWS_SECRET_ACCESS_KEY }} + aws-region: us-west-2 + + - name: Cache if success + id: performance-tracking + uses: actions/cache@v2 + with: + path: go.mod + key: performance-tracking-test-${{ github.sha }} + + - name: Echo Test Info + run: echo run performance-tracking + - name: Verify Terraform version + run: terraform --version + - name: Get SHA + id: sha + run: echo "::set-output name=sha_short::$(git rev-parse --short HEAD)" + - name: Get git date + id: sha_date + run: echo "::set-output name=sha_date::$(git show -s --format=%ct ${{ steps.sha.outputs.sha_short }} )" + - name: Check env + run: echo "SHA ${{ steps.sha.outputs.sha_short }} | Date ${{ steps.sha_date.outputs.sha_date }} " + # nick-invision/retry@v2 starts at base dir + - name: Terraform apply + if: steps.performance-tracking.outputs.cache-hit != 'true' + uses: nick-invision/retry@v2 + with: + max_attempts: 1 + timeout_minutes: 50 + retry_wait_seconds: 5 + command: | + cd integration/terraform/ec2/linux + terraform init + if terraform apply --auto-approve \ + -var="ssh_key=${PRIVATE_KEY}" -var="github_repo=${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}.git" \ + -var="github_sha=${GITHUB_SHA}" -var="install_agent=rpm -U ./amazon-cloudwatch-agent.rpm" \ + -var="user=ec2-user" \ + -var="ami=cloudwatch-agent-integration-test-al2*" \ + -var="ca_cert_path=/etc/ssl/certs/ca-bundle.crt" \ + -var="arc=amd64" \ + -var="binary_name=amazon-cloudwatch-agent.rpm" \ + -var="local_stack_host_name=${{ needs.StartLocalStack.outputs.local_stack_host_name }}" \ + -var="s3_bucket=${S3_INTEGRATION_BUCKET}" \ + -var="vpc_security_group_ids=${VPC_SECURITY_GROUPS_IDS}" \ + -var="key_name=${KEY_NAME}" \ + -var="test_name=cw-integ-test-al2" \ + -var="iam_instance_profile=${IAM_ROLE}" \ + -var="sha=${{ steps.sha.outputs.sha_short }}" \ + -var="sha_date=${{ steps.sha_date.outputs.sha_date }}" \ + -var="test_dir=./integration/test/performancetest" ; then terraform destroy -auto-approve + + else + terraform destroy -auto-approve && exit 1 + fi + + #This is here just in case workflow cancel + - name: Terraform destroy + if: ${{ cancelled() && steps.performance-tracking.outputs.cache-hit != 'true' }} + uses: nick-invision/retry@v2 + with: + max_attempts: 3 + timeout_minutes: 8 + retry_wait_seconds: 5 + command: cd integration/terraform/ec2/linux && terraform destroy --auto-approve diff --git a/RELEASE_NOTES b/RELEASE_NOTES index a14bd4b4c0..e95ee484b8 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,3 +1,31 @@ +======================================================================== +Amazon CloudWatch Agent 1.247352.0 (2022-05-26) +======================================================================== + +New features: +* Support log group retention in CloudWatch Logs for 6,7,8,9 years (#469) +* Add scraping ECS_CONTAINER_METADATA_URI_V4 for ECS (#453) +* Allow account ID as placeholder value for log configuration (#400) +* Restart agent on RPM upgrade to fix SSM feature to auto update CWAgent (#387) +* Increase dimension's maximum to 30 for each metrics (#361) +* Creates a system user, "aoc", for the AWS Distro for OpenTelemetry collector + without a shell, or updates an existing "aoc" user on the host so that the existing + "aoc" user has no shell. + +Bug fixes: +* Avoid Windows Server 2022 and PowerShell ISE exit on stderr (#473) +* Enhance config validation for bad regex in CWAgent (#459) +* Delete log's state file when tailer terminates due to an error (#457) +* Auto_removal sends all remain logs before deleting the files (#452) +* Include metric name in warning message when value is negative (#445) +* Add writing to CWAgent's log before a panic (#421) +* Move conflicting log retention check to translator (#418) +* Fix race condition when creating log groups and log streams (ResourceAlreadyExists) (#416) +* Retry on network failure for detecting EC2 (#397) +* ECS Service Discovery: Fix implicit network mode (#385) +* Fix Windows event log messages truncated on Windows Server 2022 (#379) +* Make CloudWatch Logs's pusher to wait for the final flush to complete before returning (#350) + ======================================================================== Amazon CloudWatch Agent 1.247350.0 (2022-01-19) ======================================================================== diff --git a/amazon-cloudwatch-container-insights/cloudwatch-agent-dockerfile/source/Dockerfile b/amazon-cloudwatch-container-insights/cloudwatch-agent-dockerfile/source/Dockerfile index 13950256c5..5dc97c22ab 100644 --- a/amazon-cloudwatch-container-insights/cloudwatch-agent-dockerfile/source/Dockerfile +++ b/amazon-cloudwatch-container-insights/cloudwatch-agent-dockerfile/source/Dockerfile @@ -16,6 +16,7 @@ ENV GO111MODULE=${GO111MODULE} COPY go.mod /go/src/github.com/aws/amazon-cloudwatch-agent/ COPY go.sum /go/src/github.com/aws/amazon-cloudwatch-agent/ RUN go mod download -x + COPY . /go/src/github.com/aws/amazon-cloudwatch-agent/ RUN make build-for-docker-${TARGETARCH} diff --git a/amazon-cloudwatch-container-insights/k8s-yaml-templates/cwagent-kubernetes-monitoring/cwagent-daemonset.yaml b/amazon-cloudwatch-container-insights/k8s-yaml-templates/cwagent-kubernetes-monitoring/cwagent-daemonset.yaml index e5be38c884..68e5601f34 100644 --- a/amazon-cloudwatch-container-insights/k8s-yaml-templates/cwagent-kubernetes-monitoring/cwagent-daemonset.yaml +++ b/amazon-cloudwatch-container-insights/k8s-yaml-templates/cwagent-kubernetes-monitoring/cwagent-daemonset.yaml @@ -89,4 +89,4 @@ spec: hostPath: path: /dev/disk/ terminationGracePeriodSeconds: 60 - serviceAccountName: cloudwatch-agent + serviceAccountName: cloudwatch-agent \ No newline at end of file diff --git a/cmd/start-amazon-cloudwatch-agent/start-amazon-cloudwatch-agent.go b/cmd/start-amazon-cloudwatch-agent/start-amazon-cloudwatch-agent.go index 504f6a5cf4..3b4edb16ca 100644 --- a/cmd/start-amazon-cloudwatch-agent/start-amazon-cloudwatch-agent.go +++ b/cmd/start-amazon-cloudwatch-agent/start-amazon-cloudwatch-agent.go @@ -62,7 +62,7 @@ func translateConfig() error { log.Printf("I! Return exit error: exit code=%d\n", status.ExitStatus()) if status.ExitStatus() == config.ERR_CODE_NOJSONFILE { - log.Printf("I! there is no json configuration when running translator\n") + log.Printf("I! No json config files found, please provide config, exit now\n") os.Exit(0) } } diff --git a/go.mod b/go.mod index cb8cbb148f..52a5097775 100644 --- a/go.mod +++ b/go.mod @@ -54,18 +54,20 @@ require ( github.com/Jeffail/gabs v1.4.0 github.com/Rican7/retry v0.1.1-0.20160712041035-272ad122d6e5 github.com/aws/aws-sdk-go v1.44.16 - github.com/aws/aws-sdk-go-v2 v1.16.3 + github.com/aws/aws-sdk-go-v2 v1.16.5 github.com/aws/aws-sdk-go-v2/config v1.15.3 + github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.9.4 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.3 github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.18.1 github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.15.4 + github.com/aws/aws-sdk-go-v2/service/dynamodb v1.15.7 github.com/aws/aws-sdk-go-v2/service/ec2 v1.29.0 - github.com/aws/smithy-go v1.11.2 + github.com/aws/smithy-go v1.11.3 github.com/bigkevmcd/go-configparser v0.0.0-20200217161103-d137835d2579 github.com/go-kit/kit v0.11.0 github.com/gobwas/glob v0.2.3 github.com/google/cadvisor v0.44.0 - github.com/google/go-cmp v0.5.7 + github.com/google/go-cmp v0.5.8 github.com/hashicorp/golang-lru v0.5.4 github.com/influxdata/telegraf v0.0.0-00010101000000-000000000000 github.com/influxdata/toml v0.0.0-20190415235208-270119a8ce65 @@ -121,11 +123,12 @@ require ( github.com/armon/go-metrics v0.3.10 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.1 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.11.2 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.9 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.12 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.6 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.3.10 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.1 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.3 // indirect + github.com/aws/aws-sdk-go-v2/service/dynamodbstreams v1.13.7 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.6 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.3 // indirect github.com/aws/aws-sdk-go-v2/service/s3 v1.26.5 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.11.3 // indirect diff --git a/go.sum b/go.sum index 417458747d..a83633fd1c 100644 --- a/go.sum +++ b/go.sum @@ -226,8 +226,8 @@ github.com/aws/aws-sdk-go-v2 v1.7.0/go.mod h1:tb9wi5s61kTDA5qCkcDbt3KRVV74GGslQk github.com/aws/aws-sdk-go-v2 v1.9.2/go.mod h1:cK/D0BBs0b/oWPIcX/Z/obahJK1TT7IPVjy53i/mX/4= github.com/aws/aws-sdk-go-v2 v1.13.0/go.mod h1:L6+ZpqHaLbAaxsqV0L4cvxZY7QupWJB4fhkf8LXvC7w= github.com/aws/aws-sdk-go-v2 v1.16.2/go.mod h1:ytwTPBG6fXTZLxxeeCCWj2/EMYp/xDUgX+OET6TLNNU= -github.com/aws/aws-sdk-go-v2 v1.16.3 h1:0W1TSJ7O6OzwuEvIXAtJGvOeQ0SGAhcpxPN2/NK5EhM= -github.com/aws/aws-sdk-go-v2 v1.16.3/go.mod h1:ytwTPBG6fXTZLxxeeCCWj2/EMYp/xDUgX+OET6TLNNU= +github.com/aws/aws-sdk-go-v2 v1.16.5 h1:Ah9h1TZD9E2S1LzHpViBO3Jz9FPL5+rmflmb8hXirtI= +github.com/aws/aws-sdk-go-v2 v1.16.5/go.mod h1:Wh7MEsmEApyL5hrWzpDkba4gwAPc5/piwLVLFnCxp48= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.1 h1:SdK4Ppk5IzLs64ZMvr6MrSficMtjY2oS0WOORXTlxwU= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.1/go.mod h1:n8Bs1ElDD2wJ9kCRTczA83gYbBmjSwZp3umc6zF4EeM= github.com/aws/aws-sdk-go-v2/config v1.8.3/go.mod h1:4AEiLtAb8kLs7vgw2ZV3p2VZ1+hBavOc84hqxVNpCyw= @@ -236,16 +236,20 @@ github.com/aws/aws-sdk-go-v2/config v1.15.3/go.mod h1:9YL3v07Xc/ohTsxFXzan9ZpFpd github.com/aws/aws-sdk-go-v2/credentials v1.4.3/go.mod h1:FNNC6nQZQUuyhq5aE5c7ata8o9e4ECGmS4lAXC7o1mQ= github.com/aws/aws-sdk-go-v2/credentials v1.11.2 h1:RQQ5fzclAKJyY5TvF+fkjJEwzK4hnxQCLOu5JXzDmQo= github.com/aws/aws-sdk-go-v2/credentials v1.11.2/go.mod h1:j8YsY9TXTm31k4eFhspiQicfXPLZ0gYXA50i4gxPE8g= +github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.9.4 h1:EoyeSOfbSuKh+bQIDoZaVJjON6PF+dsSn5w1RhIpMD0= +github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.9.4/go.mod h1:bfCL7OwZS6owS06pahfGxhcgpLWj2W1sQASoYRuenag= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.6.0/go.mod h1:gqlclDEZp4aqJOancXK6TN24aKhT0W0Ae9MHk3wzTMM= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.3 h1:LWPg5zjHV9oz/myQr4wMs0gi4CjnDN/ILmyZUFYXZsU= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.3/go.mod h1:uk1vhHHERfSVCUnqSqz8O48LBYDSC+k6brng09jcMOk= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.5.3 h1:0O72494cCsazjpsGfo+LXezru6PMSp0HUB1m5UfpaRU= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.4/go.mod h1:XHgQ7Hz2WY2GAn//UXHofLfPXWh+s62MbMOijrg12Lw= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.9 h1:onz/VaaxZ7Z4V+WIN9Txly9XLTmoOh1oJ8XcAC3pako= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.9/go.mod h1:AnVH5pvai0pAF4lXRq0bmhbes1u9R8wTE+g+183bZNM= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.12 h1:Zt7DDk5V7SyQULUUwIKzsROtVzp/kVvcz15uQx/Tkow= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.12/go.mod h1:Afj/U8svX6sJ77Q+FPWMzabJ9QjbwP32YlopgKALUpg= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.2.0/go.mod h1:BsCSJHx5DnDXIrOcqB8KN1/B+hXLG/bi4Y6Vjcx/x9E= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.3 h1:9stUQR/u2KXU6HkFJYlqnZEjBnbgrVbG6I5HN09xZh0= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.3/go.mod h1:ssOhaLpRlh88H3UmEcsBoVKq309quMvm3Ds8e9d4eJM= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.6 h1:eeXdGVtXEe+2Jc49+/vAzna3FAQnUD4AagAw8tzbmfc= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.6/go.mod h1:FwpAKI+FBPIELJIdmQzlLtRe8LQSOreMcM2wBsPMvvc= github.com/aws/aws-sdk-go-v2/internal/ini v1.2.4/go.mod h1:ZcBrrI3zBKlhGFNYWvju0I3TR93I7YIgAfy82Fh4lcQ= github.com/aws/aws-sdk-go-v2/internal/ini v1.3.10 h1:by9P+oy3P/CwggN4ClnW2D4oL91QV7pBzBICi1chZvQ= github.com/aws/aws-sdk-go-v2/internal/ini v1.3.10/go.mod h1:8DcYQcz0+ZJaSxANlHIsbbi6S+zMwjwdDqwW3r9AzaE= @@ -257,15 +261,19 @@ github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.18.1 h1:8PHGmLw1QbTdXfgEpXclO github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.18.1/go.mod h1:Z+8JhhltQDM1vIHvEtQLr1wVVAqQVLpvCDMVqYBrwr8= github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.15.4 h1:mBqjBKtZzvAc9j7gU+FEHbhTKSr02iqMOdQIL/7GZ78= github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.15.4/go.mod h1:R49Py2lGoKH7bCpwhjN9l7MfR/PU6zHXn1tCRR8cwOs= -github.com/aws/aws-sdk-go-v2/service/dynamodb v1.14.0 h1:P+eF8PKkeaiTfN/VBe5GI3uNdhwCPVYCQxchRewJcWk= +github.com/aws/aws-sdk-go-v2/service/dynamodb v1.15.7 h1:Ls6kDGWNr3wxE8JypXgTTonHpQ1eRVCGNqaFHY2UASw= +github.com/aws/aws-sdk-go-v2/service/dynamodb v1.15.7/go.mod h1:+v2jeT4/39fCXUQ0ZfHQHMMiJljnmiuj16F03uAd9DY= +github.com/aws/aws-sdk-go-v2/service/dynamodbstreams v1.13.7 h1:o2HKntJx3vr3y11NK58RA6tYKZKQo5PWWt/bs0rWR0U= +github.com/aws/aws-sdk-go-v2/service/dynamodbstreams v1.13.7/go.mod h1:FAVtDKEl/8WxRDQ33e2fz16RO1t4zeEwWIU5kR29xXs= github.com/aws/aws-sdk-go-v2/service/ec2 v1.29.0 h1:7jk4NfzDnnSbaR9E4mOBWRZXQThq5rsqjlDC+uu9dsI= github.com/aws/aws-sdk-go-v2/service/ec2 v1.29.0/go.mod h1:HoTu0hnXGafTpKIZQ60jw0ybhhCH1QYf20oL7GEJFdg= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.1 h1:T4pFel53bkHjL2mMo+4DKE6r6AuoZnM0fg7k1/ratr4= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.1/go.mod h1:GeUru+8VzrTXV/83XyMJ80KpH8xO89VPoUileyNQ+tc= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.2 h1:T/ywkX1ed+TsZVQccu/8rRJGxKZF/t0Ivgrb4MHTSeo= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.2/go.mod h1:RnloUnyZ4KN9JStGY1LuQ7Wzqh7V0f8FinmRdHYtuaA= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.3 h1:I0dcwWitE752hVSMrsLCxqNQ+UdEp3nACx2bYNMQq+k= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.3/go.mod h1:Seb8KNmD6kVTjwRjVEgOT5hPin6sq+v4C2ycJQDwuH8= -github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.3 h1:JUbFrnq5mEeM2anIJ2PUkaHpKPW/D+RYAQVv5HXYQg4= -github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.3/go.mod h1:lgGDXBzoot238KmAAn6zf9lkoxcYtJECnYURSbvNlfc= +github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.6 h1:JGrc3+kkyr848/wpG2+kWuzHK3H4Fyxj2jnXj8ijQ/Y= +github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.7.6/go.mod h1:zwvTysbXES8GDwFcwCPB8NkC+bCdio1abH+E+BRe/xg= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.3.2/go.mod h1:72HRZDLMtmVQiLG2tLfQcaWLCssELvGl+Zf2WVxMmR8= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.7.0/go.mod h1:K/qPe6AP2TGYv4l6n7c88zh9jWBDf6nHhvg1fx/EWfU= github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.3 h1:Gh1Gpyh01Yvn7ilO/b/hr01WgNpaszfbKMUgqM186xQ= @@ -289,8 +297,9 @@ github.com/aws/aws-sdk-go-v2/service/timestreamwrite v1.3.2 h1:1s/RRA5Owuz4/G/eW github.com/aws/smithy-go v1.5.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/aws/smithy-go v1.10.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= -github.com/aws/smithy-go v1.11.2 h1:eG/N+CcUMAvsdffgMvjMKwfyDzIkjM6pfxMJ8Mzc6mE= github.com/aws/smithy-go v1.11.2/go.mod h1:3xHYmszWVx2c0kIwQeEVf9uSm4fYZt67FBJnwub1bgM= +github.com/aws/smithy-go v1.11.3 h1:DQixirEFM9IaKxX1olZ3ke3nvxRS2xMDteKIDWxozW8= +github.com/aws/smithy-go v1.11.3/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/aws/telegraf v0.10.2-0.20220502160831-c20ebe67c5ef h1:O53nKbZm2XpdudUywNdqbohwUxje9k4vE0xRXWeIVbE= github.com/aws/telegraf v0.10.2-0.20220502160831-c20ebe67c5ef/go.mod h1:6maU8S0L0iMSa0ZvH5b2W7dBX1xjK0D5ONAqe7WTqXc= github.com/aws/telegraf/patches/gopsutil/v3 v3.0.0-20220502160831-c20ebe67c5ef h1:iiO0qNErnQgaU6mJY+PRlwnoHp+s9VTk2Ax1A8KRoG4= @@ -881,8 +890,9 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-containerregistry v0.5.1/go.mod h1:Ct15B4yir3PLOP5jsy0GNeYVaIZs/MK/Jz5any1wFW0= github.com/google/go-github/v32 v32.1.0 h1:GWkQOdXqviCPx7Q7Fj+KyPoGm4SwHRh8rheoPhd27II= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= diff --git a/integration/terraform/ec2/README.md b/integration/terraform/ec2/README.md index 5856575488..cf9e9d6f90 100644 --- a/integration/terraform/ec2/README.md +++ b/integration/terraform/ec2/README.md @@ -35,6 +35,7 @@ for how to easily generate a new policy. "ec2:DescribeVpcs", "ec2:GetPasswordData", "ec2:ModifyInstanceAttribute", + "dynamodb:*", "ec2:RunInstances", "ec2:TerminateInstances", "s3:ListBucket", diff --git a/integration/terraform/ec2/linux/main.tf b/integration/terraform/ec2/linux/main.tf index 6229c1a9da..14638147df 100644 --- a/integration/terraform/ec2/linux/main.tf +++ b/integration/terraform/ec2/linux/main.tf @@ -7,7 +7,9 @@ resource "aws_instance" "integration-test" { provisioner "remote-exec" { inline = [ "cloud-init status --wait", + "echo sha ${var.sha}", "echo clone and install agent", + "export PATH=$PATH:/usr/local/go/bin", "git clone ${var.github_repo}", "cd amazon-cloudwatch-agent", "git reset --hard ${var.github_sha}", @@ -25,7 +27,9 @@ resource "aws_instance" "integration-test" { "echo run tests with the tag integration, one at a time, and verbose", "cd ~/amazon-cloudwatch-agent", "echo run sanity test && go test ./integration/test/sanity -p 1 -v --tags=integration", - "go test ${var.test_dir} -p 1 -v --tags=integration" + "export SHA=${var.sha}", + "export SHA_DATE=${var.sha_date}", + "go test ${var.test_dir} -p 1 -timeout 30m -v --tags=integration " ] connection { type = "ssh" diff --git a/integration/terraform/ec2/linux/variables.tf b/integration/terraform/ec2/linux/variables.tf index fdbf017df9..1d4083fc9d 100644 --- a/integration/terraform/ec2/linux/variables.tf +++ b/integration/terraform/ec2/linux/variables.tf @@ -87,4 +87,12 @@ variable "test_name" { variable "test_dir" { type = string default = "" +} +variable "sha" { + type = string + default = "" +} +variable "sha_date"{ + type = string + default = "" } \ No newline at end of file diff --git a/integration/test/performancetest/performance_query_utils.go b/integration/test/performancetest/performance_query_utils.go new file mode 100644 index 0000000000..56cef68e8c --- /dev/null +++ b/integration/test/performancetest/performance_query_utils.go @@ -0,0 +1,157 @@ +package performancetest + +import ( + "time" + "errors" + "context" + "encoding/json" + "os" + "fmt" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/cloudwatch" + "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" +) + +const ( + Namespace = "CWAgent" + DimensionName = "InstanceId" + Stat = "Average" + Period = 10 +) + +/* + * GetConfigMetrics parses the cloudwatch agent config and returns the associated + * metrics that the cloudwatch agent is measuring on itself +*/ +func GetConfigMetrics() ([]string, []string, error) { + //get metric measurements from config file + file, err := os.ReadFile(configPath) + if err != nil { + return nil, nil, err + } + + var cfgFileData map[string]interface{} + err = json.Unmarshal(file, &cfgFileData) + if err != nil { + return nil, nil, err + } + + //go through the config json to get to the procstat metrics + procstatList := cfgFileData["metrics"].(map[string]interface{})["metrics_collected"].(map[string]interface{})["procstat"].([]interface{}) + + //within procstat metrics, find cloudwatch-agent process + cloudwatchIndex := -1 + for i, process := range procstatList { + if process.(map[string]interface{})["exe"].(string) == "cloudwatch-agent" { + cloudwatchIndex = i + } + } + + //check to see if the process was not found + if cloudwatchIndex == -1 { + return nil, nil, errors.New("cloudwatch-agent process not found in cloudwatch agent config") + } + + //use the index to get the rest of the path + metricList := procstatList[cloudwatchIndex].(map[string]interface{})["measurement"].([]interface{}) + + //convert the resulting []interface{} to []string and create matching metric ids for each one + metricNames := make([]string, len(metricList)) + ids := make([]string, len(metricList)) + for i, metricName := range metricList { + metricNames[i] = "procstat_" + metricName.(string) + ids[i] = fmt.Sprint("m", i + 1) + } + + return metricNames, ids, nil +} + +// GenerateGetMetricInputStruct generates the struct required to make a query request to cloudwatch's GetMetrics +func GenerateGetMetricInputStruct(ids, metricNames []string, instanceId string, timeDiff int) (*cloudwatch.GetMetricDataInput, error) { + if len(ids) != len(metricNames) { + return nil, errors.New("Mismatching lengths of metric ids and metricNames") + } + + if len(ids) == 0 || len(metricNames) == 0 || instanceId == "" || timeDiff == 0 { + return nil, errors.New("Must supply metric ids, metric names, instance id, and time to collect metrics") + } + + dimensionValue := instanceId + metricDataQueries := []types.MetricDataQuery{} + + //generate list of individual metric requests + for i, id := range ids { + metricDataQueries = append(metricDataQueries, ConstructMetricDataQuery(id, Namespace, DimensionName, dimensionValue, metricNames[i], timeDiff)) + } + + timeNow := time.Now() + input := &cloudwatch.GetMetricDataInput{ + EndTime: aws.Time(time.Unix(timeNow.Unix(), 0)), + StartTime: aws.Time(time.Unix(timeNow.Add(time.Duration(-timeDiff)*time.Minute).Unix(), 0)), + MetricDataQueries: metricDataQueries, + } + + return input, nil +} + +// ConstructMetricDataQuery is a helper function for GenerateGetMetricInputStruct and constructs individual metric requests +func ConstructMetricDataQuery(id, namespace, dimensionName, dimensionValue, metricName string, timeDiff int) (types.MetricDataQuery) { + query := types.MetricDataQuery{ + Id: aws.String(id), + MetricStat: &types.MetricStat{ + Metric: &types.Metric{ + Namespace: aws.String(namespace), + MetricName: aws.String(metricName), + Dimensions: []types.Dimension{ + types.Dimension{ + Name: aws.String(dimensionName), + Value: aws.String(dimensionValue), + }, + }, + }, + Period: aws.Int32(int32(Period)), + Stat: aws.String(Stat), + }, + } + + return query +} + +func GetPerformanceMetrics(instanceId string, agentRuntime int, agentContext context.Context) ([]byte, error) { + + //load default configuration + cfg, err := config.LoadDefaultConfig(agentContext) + if err != nil { + return nil, err + } + + client := cloudwatch.NewFromConfig(cfg) + + //fetch names of metrics to request and generate corresponding ids + metricNames, ids, err := GetConfigMetrics() + if err != nil { + return nil, err + } + + //make input struct + input, err := GenerateGetMetricInputStruct(ids, metricNames, instanceId, agentRuntime) + if err != nil { + return nil, err + } + + //call to CloudWatch API + metrics, err := client.GetMetricData(agentContext, input) + if err != nil { + return nil, err + } + + //format data to json before passing output + outputData, err := json.MarshalIndent(metrics.MetricDataResults, "", " ") + if err != nil { + return nil, err + } + + return outputData, nil +} \ No newline at end of file diff --git a/integration/test/performancetest/performance_test.go b/integration/test/performancetest/performance_test.go new file mode 100644 index 0000000000..bdab852be6 --- /dev/null +++ b/integration/test/performancetest/performance_test.go @@ -0,0 +1,107 @@ +//go:build linux && integration +// +build linux,integration + +package performancetest + +import ( + "context" + "fmt" + "log" + "os" + "sync" + "testing" + "time" + + "github.com/aws/amazon-cloudwatch-agent/integration/test" +) + +const ( + configPath = "resources/config.json" + configOutputPath = "/opt/aws/amazon-cloudwatch-agent/bin/config.json" + agentRuntimeMinutes = 5 //20 mins desired but 5 mins for testing purposes + DynamoDBDataBase = "CWAPerformanceMetrics" + logOutputPath1 = "/tmp/test1.log" + logOutputPath2 = "/tmp/test2.log" + transactionRatePerSecond = 10 +) + +func TestPerformance(t *testing.T) { + agentContext := context.TODO() + instanceId := test.GetInstanceId() + log.Printf("Instance ID used for performance metrics : %s\n", instanceId) + + test.CopyFile(configPath, configOutputPath) + + test.StartAgent(configOutputPath, true) + + agentRunDuration := agentRuntimeMinutes * time.Minute + + //create wait group so main test thread waits for log writing to finish before stopping agent and collecting data + var logWaitGroup sync.WaitGroup + logWaitGroup.Add(2) + + //start goroutines to write to log files concurrently + go func() { + defer logWaitGroup.Done() + writeToLogs(t, logOutputPath1, agentRunDuration) + }() + go func() { + defer logWaitGroup.Done() + writeToLogs(t, logOutputPath2, agentRunDuration) + }() + + //wait until writing to logs finishes + logWaitGroup.Wait() + + log.Printf("Agent has been running for : %s\n", (agentRunDuration).String()) + test.StopAgent() + + //collect data + data, err := GetPerformanceMetrics(instanceId, agentRuntimeMinutes, agentContext) + //@TODO check if metrics are zero remove them and make sure there are non-zero metrics existing + if err != nil { + log.Println("Error: ", err) + t.Fatalf("Error: %v", err) + } + + if data == nil { + t.Fatalf("No data") + } + + //data base + dynamoDB := InitializeTransmitterAPI(DynamoDBDataBase) //add cwa version here + if dynamoDB == nil { + t.Fatalf("Error: generating dynamo table") + } + _, err = dynamoDB.SendItem(data) + if err != nil { + t.Fatalf("Error: couldnt upload metric data to table") + } +} + +func writeToLogs(t *testing.T, filePath string, durationMinutes time.Duration) { + f, err := os.Create(filePath) + if err != nil { + t.Fatalf("Error occurred creating log file for writing: %v", err) + } + defer f.Close() + defer os.Remove(filePath) + + log.Printf("Writing lines to %s with %d transactions per second", filePath, transactionRatePerSecond) + + startTime := time.Now() + + //loop until the test duration is reached + for currTime := startTime; currTime.Sub(startTime) < durationMinutes; currTime = time.Now() { + + //assume this for loop runs instantly for purposes of simple throughput calculation + for i := 0; i < transactionRatePerSecond; i++ { + _, err = f.WriteString(fmt.Sprintf("%s - #%d This is a log line.\n", currTime.Format(time.StampMilli), i)) + if err != nil { + t.Logf("Error occurred writing log line: %v", err) + } + } + + time.Sleep(1 * time.Second) + } +} diff --git a/integration/test/performancetest/resources/config.json b/integration/test/performancetest/resources/config.json new file mode 100644 index 0000000000..cb1ded5532 --- /dev/null +++ b/integration/test/performancetest/resources/config.json @@ -0,0 +1,114 @@ +{ + "agent": { + "metrics_collection_interval": 1, + "run_as_user": "root" + }, + "metrics": { + "aggregation_dimensions": [ + [ + "InstanceId" + ] + ], + "append_dimensions": { + "AutoScalingGroupName": "${aws:AutoScalingGroupName}", + "ImageId": "${aws:ImageId}", + "InstanceId": "${aws:InstanceId}", + "InstanceType": "${aws:InstanceType}" + }, + "metrics_collected": { + "collectd": { + "metrics_aggregation_interval": 10 + }, + "cpu": { + "measurement": [ + "cpu_usage_idle", + "cpu_usage_iowait", + "cpu_usage_user", + "cpu_usage_system" + ], + "metrics_collection_interval": 1, + "resources": [ + "*" + ], + "totalcpu": false + }, + "disk": { + "measurement": [ + "used_percent", + "inodes_free" + ], + "metrics_collection_interval": 1, + "resources": [ + "*" + ] + }, + "diskio": { + "measurement": [ + "io_time", + "write_bytes", + "read_bytes", + "writes", + "reads" + ], + "metrics_collection_interval": 1, + "resources": [ + "*" + ] + }, + "mem": { + "measurement": [ + "mem_used_percent" + ], + "metrics_collection_interval": 1 + }, + "netstat": { + "measurement": [ + "tcp_established", + "tcp_time_wait" + ], + "metrics_collection_interval": 1 + }, + "procstat": [ + { + "exe": "cloudwatch-agent", + "measurement": [ + "cpu_usage", + "memory_rss" + ], + "metrics_collection_interval": 10 + } + ], + "statsd": { + "metrics_aggregation_interval": 10, + "metrics_collection_interval": 10, + "service_address": ":8125" + }, + "swap": { + "measurement": [ + "swap_used_percent" + ], + "metrics_collection_interval": 1 + } + } + }, + "logs": { + "logs_collected": { + "files": { + "collect_list": [ + { + "file_path": "/tmp/test1.log", + "log_group_name": "{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC" + }, + { + "file_path": "/tmp/test2.log", + "log_group_name": "{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC" + } + ] + } + } + } +} diff --git a/integration/test/performancetest/transmitter.go b/integration/test/performancetest/transmitter.go new file mode 100644 index 0000000000..ddb5769ea9 --- /dev/null +++ b/integration/test/performancetest/transmitter.go @@ -0,0 +1,266 @@ +package performancetest + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "os" + "time" + "strconv" + "log" + "math" + "sort" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue" + "github.com/aws/aws-sdk-go-v2/service/dynamodb" + "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" +) + +const ( + METRIC_PERIOD = 5 * 60 // this const is in seconds , 5 mins + PARTITION_KEY ="Year" + HASH = "Hash" + COMMIT_DATE= "CommitDate" + SHA_ENV = "SHA" + SHA_DATE_ENV = "SHA_DATE" +) +type TransmitterAPI struct { + dynamoDbClient *dynamodb.Client + DataBaseName string // this is the name of the table when test is run +} + +// this is the packet that will be sent converted to DynamoItem +type Metric struct { + Average float64 + P99 float64 //99% percent process + Max float64 + Min float64 + Period int //in seconds + Std float64 + Data []float64 +} + +type collectorData []struct { // this is the struct data collector passes in + Id string `json:"Id"` + Label string `json:Label` + Messages string `json:Messages` + StatusCode string `json:StatusCode` + Timestamps []string `json:Timestamps` + Values []float64 `json:Values` +} + +/* +InitializeTransmitterAPI +Desc: Initializes the transmitter class +Side effects: Creates a dynamodb table if it doesn't already exist +*/ +func InitializeTransmitterAPI(DataBaseName string) *TransmitterAPI { + //setup aws session + cfg, err := config.LoadDefaultConfig(context.TODO(),config.WithRegion("us-west-2")) + if err != nil { + fmt.Printf("Error: Loading in config %s\n", err) + } + transmitter := TransmitterAPI{ + dynamoDbClient: dynamodb.NewFromConfig(cfg), + DataBaseName: DataBaseName, + } + // check if the dynamo table exist if not create it + tableExist, err := transmitter.TableExist() + if err != nil { + return nil + } + if !tableExist { + fmt.Println("Table doesn't exist") + err := transmitter.CreateTable() + if err != nil { + return nil + } + } + fmt.Println("API ready") + return &transmitter + +} + +/* +CreateTable() +Desc: Will create a DynamoDB Table with given param. and config +*/ + //add secondary index space vs time +func (transmitter *TransmitterAPI) CreateTable() error { + _, err := transmitter.dynamoDbClient.CreateTable( + context.TODO(), &dynamodb.CreateTableInput{ + AttributeDefinitions: []types.AttributeDefinition{ + { + AttributeName: aws.String(PARTITION_KEY), + AttributeType: types.ScalarAttributeTypeN, + }, + { + AttributeName: aws.String("CommitDate"), + AttributeType: types.ScalarAttributeTypeN, + }, + }, + KeySchema: []types.KeySchemaElement{ + { + AttributeName: aws.String(PARTITION_KEY), + KeyType: types.KeyTypeHash, + }, + { + AttributeName: aws.String("CommitDate"), + KeyType: types.KeyTypeRange, + }, + }, + + ProvisionedThroughput: &types.ProvisionedThroughput{ + ReadCapacityUnits: aws.Int64(10), + WriteCapacityUnits: aws.Int64(10), + }, + TableName: aws.String(transmitter.DataBaseName), + }) // this is the config for the new table) + if err != nil { + fmt.Printf("Error calling CreateTable: %s", err) + return err + } + //https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/GettingStarted.CreateTable.html + waiter := dynamodb.NewTableExistsWaiter(transmitter.dynamoDbClient) + err = waiter.Wait(context.TODO(), &dynamodb.DescribeTableInput{ + TableName: aws.String(transmitter.DataBaseName)}, 5* time.Minute) + if err != nil { + log.Printf("Wait for table exists failed. Here's why: %v\n", err) + } + fmt.Println("Created the table", transmitter.DataBaseName) + return nil +} + +/* +AddItem() +Desc: Takes in a packet and +will convert to dynamodb format and upload to dynamodb table. +Param: + packet * map[string]interface{}: is a map with data collection data +Side effects: + Adds an item to dynamodb table +*/ +func (transmitter *TransmitterAPI) AddItem(packet map[string]interface{}) (string, error) { + item, err := attributevalue.MarshalMap(packet) + if err != nil { + panic(err) + } + _, err = transmitter.dynamoDbClient.PutItem(context.TODO(), + &dynamodb.PutItemInput{ + Item: item, + TableName: aws.String(transmitter.DataBaseName), + }) + if err != nil { + fmt.Printf("Error adding item to table. %v\n", err) + } + return fmt.Sprintf("%v", item), err +} + +/* +TableExist() +Desc: Checks if the the table exist and returns the value +//https://github.com/awsdocs/aws-doc-sdk-examples/blob/05a89da8c2f2e40781429a7c34cf2f2b9ae35f89/gov2/dynamodb/actions/table_basics.go +*/ +func (transmitter *TransmitterAPI) TableExist() (bool, error) { + exists := true + _, err := transmitter.dynamoDbClient.DescribeTable( + context.TODO(), &dynamodb.DescribeTableInput{TableName: aws.String(transmitter.DataBaseName)}, + ) + if err != nil { + var notFoundEx *types.ResourceNotFoundException + if errors.As(err, ¬FoundEx) { + fmt.Printf("Table %v does not exist.\n", transmitter.DataBaseName) + err = nil + } else { + fmt.Printf("Couldn't determine existence of table %v. Error: %v\n", transmitter.DataBaseName, err) + } + exists = false + } + return exists, err +} + + +/* +SendItem() +Desc: Parses the input data and adds it to the dynamo table +Param: data []byte is the data collected by data collector +*/ +func (transmitter *TransmitterAPI) SendItem(data []byte) (string, error) { + // return nil + packet, err := transmitter.Parser(data) + if err != nil { + return "", err + } + // fmt.Printf("%+v",packet) + sentItem, err := transmitter.AddItem(packet) + return sentItem, err +} +func (transmitter *TransmitterAPI) Parser(data []byte) (map[string]interface{}, error) { + dataHolder := collectorData{} + err := json.Unmarshal(data, &dataHolder) + if err != nil { + return nil, err + } + packet := make(map[string]interface{}) + packet[PARTITION_KEY] = time.Now().Year() + packet[HASH] = os.Getenv(SHA_ENV) //fmt.Sprintf("%d", time.Now().UnixNano()) + packet[COMMIT_DATE],_ = strconv.Atoi(os.Getenv(SHA_DATE_ENV)) + + for _, rawMetricData := range dataHolder { + numDataPoints := float64(len(rawMetricData.Timestamps)) + + avg, min, max, p99Val, stdDev := CalcStats(rawMetricData.Values) + + //---------------- + metric := Metric{ + Average: avg, + Max: max, + Min: min, + P99: p99Val, + Std: stdDev, + Period: int(METRIC_PERIOD / (numDataPoints)), + Data: rawMetricData.Values} + packet[rawMetricData.Label] = metric + } + return packet, nil +} + +//CalcStats takes in an array of data and returns the average, min, max, p99, and stdev of the data +func CalcStats(data []float64) (float64, float64, float64, float64, float64) { + length := len(data) + if length == 0 { + return -1.0, -1.0, -1.0, -1.0, -1.0 + } + if length < 99 { + log.Println("Note: less than 99 values given, p99 value will be equal the max value") + } + + //make a copy so we aren't modifying original + dataCopy := make([]float64, length) + copy(dataCopy, data) + sort.Float64s(dataCopy) + + min := dataCopy[0] + max := dataCopy[length - 1] + + sum := 0.0 + for _, value := range dataCopy { + sum += value + } + + avg := sum / float64(length) + + p99Index := length - 1 - (length / 99) + p99Val := dataCopy[p99Index] + + stdDevSum := 0.0 + for _, value := range dataCopy { + stdDevSum += math.Pow(avg - value, 2) + } + + stdDev := math.Sqrt(stdDevSum / float64(length)) + return avg, min, max, p99Val, stdDev +} \ No newline at end of file diff --git a/packaging/darwin/amazon-cloudwatch-agent-ctl b/packaging/darwin/amazon-cloudwatch-agent-ctl index 94aaebb96c..20baf62f85 100755 --- a/packaging/darwin/amazon-cloudwatch-agent-ctl +++ b/packaging/darwin/amazon-cloudwatch-agent-ctl @@ -175,21 +175,19 @@ cwa_config() { if [ "${config_location}" = "${ALL_CONFIG}" ]; then rm -rf "${JSON_DIR}"/* else - runDownloaderCommand="${CMDDIR}/config-downloader --output-dir ${JSON_DIR} --download-source ${config_location} --mode ${mode} --config ${COMMON_CONIG} --multi-config ${multi_config}" - echo ${runDownloaderCommand} - ${runDownloaderCommand} + runDownloaderCommand=`"${CMDDIR}/config-downloader" --output-dir "${JSON_DIR}" --download-source "${config_location}" --mode ${mode} --config "${COMMON_CONIG}" --multi-config ${multi_config}` + echo "${runDownloaderCommand}" fi if [ ! "$(ls ${JSON_DIR})" ]; then echo "all amazon-cloudwatch-agent configurations have been removed" rm -f "${TOML}" else - runTranslatorCommand="${CMDDIR}/config-translator --input ${JSON} --input-dir ${JSON_DIR} --output ${TOML} --mode ${mode} --config ${COMMON_CONIG} --multi-config ${multi_config}" - echo ${runTranslatorCommand} - ${runTranslatorCommand} + runTranslatorCommand=`"${CMDDIR}/config-translator" --input "${JSON}" --input-dir "${JSON_DIR}" --output "${TOML}" --mode ${mode} --config "${COMMON_CONIG}" --multi-config ${multi_config}` + echo "${runTranslatorCommand}" runAgentSchemaTestCommand="${CMDDIR}/amazon-cloudwatch-agent -schematest -config ${TOML}" - echo ${runAgentSchemaTestCommand} + echo "${runAgentSchemaTestCommand}" # We will redirect the verbose error message out if ! ${runAgentSchemaTestCommand} >${CV_LOG_FILE} 2>&1; then echo "Configuration validation second phase failed" diff --git a/packaging/dependencies/amazon-cloudwatch-agent-ctl b/packaging/dependencies/amazon-cloudwatch-agent-ctl index 5de4a7698e..df5e381f7a 100755 --- a/packaging/dependencies/amazon-cloudwatch-agent-ctl +++ b/packaging/dependencies/amazon-cloudwatch-agent-ctl @@ -335,9 +335,8 @@ cwa_config() { if [ "${cwa_config_location}" = "${ALL_CONFIG}" ]; then rm -rf "${JSON_DIR}"/* else - runDownloaderCommand="${CMDDIR}/config-downloader --output-dir ${JSON_DIR} --download-source ${cwa_config_location} --mode ${param_mode} --config ${COMMON_CONIG} --multi-config ${multi_config}" - echo "${runDownloaderCommand}" - ${runDownloaderCommand} || return + runDownloaderCommand=`"${CMDDIR}/config-downloader" --output-dir "${JSON_DIR}" --download-source "${cwa_config_location}" --mode ${param_mode} --config "${COMMON_CONIG}" --multi-config ${multi_config}` + echo ${runDownloaderCommand} || return fi if [ ! "$(ls ${JSON_DIR})" ]; then @@ -345,9 +344,8 @@ cwa_config() { rm -f "${TOML}" else echo "Start configuration validation..." - runTranslatorCommand="${CMDDIR}/config-translator --input ${JSON} --input-dir ${JSON_DIR} --output ${TOML} --mode ${param_mode} --config ${COMMON_CONIG} --multi-config ${multi_config}" - echo "${runTranslatorCommand}" - ${runTranslatorCommand} || return + runTranslatorCommand=`"${CMDDIR}/config-translator" --input "${JSON}" --input-dir "${JSON_DIR}" --output "${TOML}" --mode ${param_mode} --config "${COMMON_CONIG}" --multi-config ${multi_config}` + echo "${runTranslatorCommand}" || return runAgentSchemaTestCommand="${CMDDIR}/amazon-cloudwatch-agent -schematest -config ${TOML}" echo "${runAgentSchemaTestCommand}" @@ -421,9 +419,8 @@ cwoc_config() { cp "${PREDEFINED_CONFIG_DATA}" "${YAML_DIR}/default.tmp" echo "Successfully fetched the config and saved in ${YAML_DIR}/default.tmp" else - runDownloaderCommand="${CMDDIR}/config-downloader --output-dir ${YAML_DIR} --download-source ${cwoc_config_location} --mode ${param_mode} --config ${COMMON_CONIG} --multi-config ${multi_config}" - echo "${runDownloaderCommand}" - ${runDownloaderCommand} || return + runDownloaderCommand=`"${CMDDIR}/config-downloader" --output-dir "${YAML_DIR}" --download-source "${cwoc_config_location}" --mode ${param_mode} --config "${COMMON_CONIG}" --multi-config ${multi_config}` + echo "${runDownloaderCommand}" || return fi if [ ! "$(ls ${YAML_DIR})" ]; then @@ -467,9 +464,8 @@ set_log_level_all() { ;; esac - runEnvConfigCommand="${CMDDIR}/amazon-cloudwatch-agent -setenv CWAGENT_LOG_LEVEL=${log_level} -envconfig ${ENV_CONFIG}" - echo "${runEnvConfigCommand}" - ${runEnvConfigCommand} || return + runEnvConfigCommand=`"${CMDDIR}/amazon-cloudwatch-agent" -setenv CWAGENT_LOG_LEVEL=${log_level} -envconfig "${ENV_CONFIG}"` + echo "${runEnvConfigCommand}" || return echo "Set CWAGENT_LOG_LEVEL to ${log_level}" } diff --git a/plugins/processors/ec2tagger/constants.go b/plugins/processors/ec2tagger/constants.go new file mode 100644 index 0000000000..0a04532310 --- /dev/null +++ b/plugins/processors/ec2tagger/constants.go @@ -0,0 +1,79 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package ec2tagger + +import ( + "time" +) + +// Reminder, keep this in sync with the plugin's README.md +const sampleConfig = ` + ## + ## ec2tagger calls AWS api to fetch EC2 Metadata and Instance Tags and EBS Volumes associated with the + ## current EC2 Instance and attached those values as tags to the metric. + ## + ## Frequency for the plugin to refresh the EC2 Instance Tags and ebs Volumes associated with this Instance. + ## Defaults to 0 (no refresh). + ## When it is zero, ec2tagger doesn't do refresh to keep the ec2 tags and ebs volumes updated. However, as the + ## AWS api request made by ec2tagger might not return the complete values (e.g. initial api call might return a + ## subset of ec2 tags), ec2tagger will retry every 3 minutes until all the tags/volumes (as specified by + ## "ec2_instance_tag_keys"/"ebs_device_keys") are retrieved successfully. (Note when the specified list is ["*"], + ## there is no way to check if all tags/volumes are retrieved, so there is no retry in that case) + # refresh_interval_seconds = 60 + ## + ## Add tags for EC2 Metadata fields. + ## Supported fields are: "InstanceId", "ImageId" (aka AMI), "InstanceType" + ## If the configuration is not provided or it has an empty list, no EC2 Metadata tags are applied. + # ec2_metadata_tags = ["InstanceId", "ImageId", "InstanceType"] + ## + ## Add tags retrieved from the EC2 Instance Tags associated with this instance. + ## If this configuration is not provided, or has an empty list, no EC2 Instance Tags are applied. + ## If this configuration contains one entry and its value is "*", then ALL EC2 Instance Tags for the instance are applied. + ## Note: This plugin renames the "aws:autoscaling:groupName" EC2 Instance Tag key to be spelled "AutoScalingGroupName". + ## This aligns it with the AutoScaling dimension-name seen in AWS CloudWatch. + # ec2_instance_tag_keys = ["aws:autoscaling:groupName", "Name"] + ## + ## Retrieve ebs_volume_id for the specified devices, add ebs_volume_id as tag. The specified devices are + ## the values corresponding to the tag key "disk_device_tag_key" in the input metric. + ## If this configuration is not provided, or has an empty list, no ebs volume is applied. + ## If this configuration contains one entry and its value is "*", then all ebs volume for the instance are applied. + # ebs_device_keys = ["/dev/xvda", "/dev/nvme0n1"] + ## + ## Specify which tag to use to get the specified disk device name from input Metric + # disk_device_tag_key = "device" + ## + ## Amazon Credentials + ## Credentials are loaded in the following order + ## 1) Assumed credentials via STS if role_arn is specified + ## 2) explicit credentials from 'access_key' and 'secret_key' + ## 3) shared profile from 'profile' + ## 4) environment variables + ## 5) shared credentials file + ## 6) EC2 Instance Profile + # access_key = "" + # secret_key = "" + # token = "" + # role_arn = "" + # profile = "" + # shared_credential_file = "" +` + +const ( + ec2InstanceTagKeyASG = "aws:autoscaling:groupName" + cwDimensionASG = "AutoScalingGroupName" + mdKeyInstanceId = "InstanceId" + mdKeyImageId = "ImageId" + mdKeyInstanceType = "InstanceType" + ebsVolumeId = "EBSVolumeId" +) + +const ( + allowedIMDSRetries = 2 +) + +var ( + defaultIMDSTimeout = 1 * time.Second + defaultRefreshInterval = 180 * time.Second + backoffSleepArray = []time.Duration{0, 1 * time.Minute, 1 * time.Minute, 3 * time.Minute, 3 * time.Minute, 3 * time.Minute, 10 * time.Minute} // backoff retry for ec2 describe instances API call. Assuming the throttle limit is 20 per second. 10 mins allow 12000 API calls. +) diff --git a/plugins/processors/ec2tagger/ec2tagger.go b/plugins/processors/ec2tagger/ec2tagger.go index 6bec709b12..26e39181f7 100644 --- a/plugins/processors/ec2tagger/ec2tagger.go +++ b/plugins/processors/ec2tagger/ec2tagger.go @@ -4,16 +4,17 @@ package ec2tagger import ( - "errors" - "fmt" "hash/fnv" + "net/http" "os" "sync" "time" configaws "github.com/aws/amazon-cloudwatch-agent/cfg/aws" "github.com/aws/amazon-cloudwatch-agent/internal" + "github.com/aws/amazon-cloudwatch-agent/translator/context" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/client" "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" @@ -21,86 +22,26 @@ import ( "github.com/influxdata/telegraf/plugins/processors" ) -// Reminder, keep this in sync with the plugin's README.md -const sampleConfig = ` - ## - ## ec2tagger calls AWS api to fetch EC2 Metadata and Instance Tags and EBS Volumes associated with the - ## current EC2 Instance and attched those values as tags to the metric. - ## - ## Frequency for the plugin to refresh the EC2 Instance Tags and ebs Volumes associated with this Instance. - ## Defaults to 0 (no refresh). - ## When it is zero, ec2tagger doesn't do refresh to keep the ec2 tags and ebs volumes updated. However, as the - ## AWS api request made by ec2tagger might not return the complete values (e.g. initial api call might return a - ## subset of ec2 tags), ec2tagger will retry every 3 minutes until all the tags/volumes (as specified by - ## "ec2_instance_tag_keys"/"ebs_device_keys") are retrieved successfully. (Note when the specified list is ["*"], - ## there is no way to check if all tags/volumes are retrieved, so there is no retry in that case) - # refresh_interval_seconds = 60 - ## - ## Add tags for EC2 Metadata fields. - ## Supported fields are: "InstanceId", "ImageId" (aka AMI), "InstanceType" - ## If the configuration is not provided or it has an empty list, no EC2 Metadata tags are applied. - # ec2_metadata_tags = ["InstanceId", "ImageId", "InstanceType"] - ## - ## Add tags retrieved from the EC2 Instance Tags associated with this instance. - ## If this configuration is not provided, or has an empty list, no EC2 Instance Tags are applied. - ## If this configuration contains one entry and its value is "*", then ALL EC2 Instance Tags for the instance are applied. - ## Note: This plugin renames the "aws:autoscaling:groupName" EC2 Instance Tag key to be spelled "AutoScalingGroupName". - ## This aligns it with the AutoScaling dimension-name seen in AWS CloudWatch. - # ec2_instance_tag_keys = ["aws:autoscaling:groupName", "Name"] - ## - ## Retrieve ebs_volume_id for the specified devices, add ebs_volume_id as tag. The specified devices are - ## the values corresponding to the tag key "disk_device_tag_key" in the input metric. - ## If this configuration is not provided, or has an empty list, no ebs volume is applied. - ## If this configuration contains one entry and its value is "*", then all ebs volume for the instance are applied. - # ebs_device_keys = ["/dev/xvda", "/dev/nvme0n1"] - ## - ## Specify which tag to use to get the specified disk device name from input Metric - # disk_device_tag_key = "device" - ## - ## Amazon Credentials - ## Credentials are loaded in the following order - ## 1) Assumed credentials via STS if role_arn is specified - ## 2) explicit credentials from 'access_key' and 'secret_key' - ## 3) shared profile from 'profile' - ## 4) environment variables - ## 5) shared credentials file - ## 6) EC2 Instance Profile - # access_key = "" - # secret_key = "" - # token = "" - # role_arn = "" - # profile = "" - # shared_credential_file = "" -` - -const ( - ec2InstanceTagKeyASG = "aws:autoscaling:groupName" - cwDimensionASG = "AutoScalingGroupName" - mdKeyInstanceId = "InstanceId" - mdKeyImageId = "ImageId" - mdKeyInstaneType = "InstanceType" - ebsVolumeId = "EBSVolumeId" -) - -var ( - defaultRefreshInterval = 180 * time.Second - // backoff retry for ec2 describe instances API call. Assuming the throttle limit is 20 per second. 10 mins allow 12000 API calls. - backoffSleepArray = []time.Duration{0, 1 * time.Minute, 1 * time.Minute, 3 * time.Minute, 3 * time.Minute, 3 * time.Minute, 10 * time.Minute} -) +type EC2MetadataAPI interface { + GetInstanceIdentityDocument() (ec2metadata.EC2InstanceIdentityDocument, error) +} -type metadataLookup struct { +type ec2MetadataLookupType struct { instanceId bool imageId bool instanceType bool } -type ec2ProviderType func(*configaws.CredentialConfig) ec2iface.EC2API - -type ec2Metadata interface { - Available() bool - GetInstanceIdentityDocument() (ec2metadata.EC2InstanceIdentityDocument, error) +type ec2MetadataRespondType struct { + instanceId string + imageId string // aka AMI + instanceType string + region string } +type ec2ProviderType func(*configaws.CredentialConfig) ec2iface.EC2API +type ec2MetadataProviderType func() EC2MetadataAPI + type Tagger struct { Log telegraf.Logger `toml:"-"` RefreshIntervalSeconds internal.Duration `toml:"refresh_interval_seconds"` @@ -118,20 +59,17 @@ type Tagger struct { Filename string `toml:"shared_credential_file"` Token string `toml:"token"` - ec2TagCache map[string]string - instanceId string - imageId string // aka AMI - instanceType string - started bool - region string - ec2Provider ec2ProviderType - ec2 ec2iface.EC2API - ec2metadata ec2Metadata - refreshTicker *time.Ticker - shutdownC chan bool - tagFilters []*ec2.Filter - metadataLookup metadataLookup - ebsVolume *EbsVolume + ec2TagCache map[string]string + started bool + ec2Provider ec2ProviderType + ec2API ec2iface.EC2API + ec2MetadataProvider ec2MetadataProviderType + ec2MetadataRespond ec2MetadataRespondType + ec2MetadataLookup ec2MetadataLookupType + refreshTicker *time.Ticker + shutdownC chan bool + tagFilters []*ec2.Filter + ebsVolume *EbsVolume sync.RWMutex //to protect ec2TagCache } @@ -145,9 +83,7 @@ func (t *Tagger) Description() string { } // Apply adds the configured EC2 Metadata and Instance Tags to metrics. -// // This is called serially for ALL metrics (that pass the plugin's tag filters) so keep it fast. -// func (t *Tagger) Apply(in ...telegraf.Metric) []telegraf.Metric { // grab the pointer to the map in case it gets refreshed while we're applying this round of metrics. At least // this batch then will all get the same tags. @@ -164,14 +100,14 @@ func (t *Tagger) Apply(in ...telegraf.Metric) []telegraf.Metric { metric.AddTag(k, v) } } - if t.metadataLookup.instanceId { - metric.AddTag(mdKeyInstanceId, t.instanceId) + if t.ec2MetadataLookup.instanceId { + metric.AddTag(mdKeyInstanceId, t.ec2MetadataRespond.instanceId) } - if t.metadataLookup.imageId { - metric.AddTag(mdKeyImageId, t.imageId) + if t.ec2MetadataLookup.imageId { + metric.AddTag(mdKeyImageId, t.ec2MetadataRespond.imageId) } - if t.metadataLookup.instanceType { - metric.AddTag(mdKeyInstaneType, t.instanceType) + if t.ec2MetadataLookup.instanceType { + metric.AddTag(mdKeyInstanceType, t.ec2MetadataRespond.instanceType) } if t.ebsVolume != nil && metric.HasTag(t.DiskDeviceTagKey) { devName := metric.Tags()[t.DiskDeviceTagKey] @@ -192,7 +128,7 @@ func (t *Tagger) updateTags() error { } for { - result, err := t.ec2.DescribeTags(input) + result, err := t.ec2API.DescribeTags(input) if err != nil { return err } @@ -237,20 +173,20 @@ func (t *Tagger) refreshLoop(refreshInterval time.Duration, stopAfterFirstSucces // need refresh volumes when it is configured and not all volumes are retrieved refreshVolumes = refreshVolumes && !t.ebsVolumesRetrieved() if !refreshTags && !refreshVolumes { - t.Log.Infof("ec2tagger: Refresh is no longer needed, stop refreshTicker.") + t.Log.Info("ec2tagger: Refresh is no longer needed, stop refreshTicker.") return } } if refreshTags { if err := t.updateTags(); err != nil { - t.Log.Warnf("ec2tagger: Error refreshing EC2 tags, keeping old values : +%v", err.Error()) + t.Log.Warnf("ec2tagger: Error refreshing EC2 tags, keeping old values : %+v", err.Error()) } } if refreshVolumes { if err := t.updateVolumes(); err != nil { - t.Log.Warnf("ec2tagger: Error refreshing EC2 volumes, keeping old values : +%v", err.Error()) + t.Log.Warnf("ec2tagger: Error refreshing EC2 volumes, keeping old values : %+v", err.Error()) } } @@ -303,38 +239,10 @@ func (t *Tagger) Init() error { t.shutdownC = make(chan bool) t.ec2TagCache = map[string]string{} - for _, tag := range t.EC2MetadataTags { - switch tag { - case mdKeyInstanceId: - t.metadataLookup.instanceId = true - case mdKeyImageId: - t.metadataLookup.imageId = true - case mdKeyInstaneType: - t.metadataLookup.instanceType = true - default: - t.Log.Errorf("ec2tagger: Unsupported EC2 Metadata key: %s", tag) - } - } - - t.Log.Infof("ec2tagger: Check ec2 metadata") - if !t.ec2metadata.Available() { - msg := "ec2tagger: Unable to retrieve InstanceId. This plugin must only be used on an EC2 instance" - t.Log.Errorf(msg) - return errors.New(msg) - } - - doc, err := t.ec2metadata.GetInstanceIdentityDocument() - if nil != err { - msg := fmt.Sprintf("ec2tagger: Unable to retrieve InstanceId : %+v", err.Error()) - t.Log.Errorf(msg) - return errors.New(msg) + if err := t.deriveEC2MetadataFromIMDS(); err != nil { + return err } - t.instanceId = doc.InstanceID - t.region = doc.Region - t.instanceType = doc.InstanceType - t.imageId = doc.ImageID - t.tagFilters = []*ec2.Filter{ { Name: aws.String("resource-type"), @@ -342,7 +250,7 @@ func (t *Tagger) Init() error { }, { Name: aws.String("resource-id"), - Values: aws.StringSlice([]string{t.instanceId}), + Values: aws.StringSlice([]string{t.ec2MetadataRespond.instanceId}), }, } @@ -365,20 +273,20 @@ func (t *Tagger) Init() error { if len(t.EC2InstanceTagKeys) > 0 || len(t.EBSDeviceKeys) > 0 { ec2CredentialConfig := &configaws.CredentialConfig{ - Region: t.region, AccessKey: t.AccessKey, SecretKey: t.SecretKey, RoleARN: t.RoleARN, Profile: t.Profile, Filename: t.Filename, Token: t.Token, + Region: t.ec2MetadataRespond.region, } - t.ec2 = t.ec2Provider(ec2CredentialConfig) + t.ec2API = t.ec2Provider(ec2CredentialConfig) go func() { //Async start of initial retrieval to prevent block of agent start t.initialRetrievalOfTagsAndVolumes() t.refreshLoopToUpdateTagsAndVolumes() }() - t.Log.Infof("ec2tagger: EC2 tagger has started initialization.") + t.Log.Info("ec2tagger: EC2 tagger has started initialization.") } else { t.setStarted() @@ -428,13 +336,13 @@ func (t *Tagger) updateVolumes() error { Filters: []*ec2.Filter{ { Name: aws.String("attachment.instance-id"), - Values: aws.StringSlice([]string{t.instanceId}), + Values: aws.StringSlice([]string{t.ec2MetadataRespond.instanceId}), }, }, } for { - result, err := t.ec2.DescribeVolumes(input) + result, err := t.ec2API.DescribeVolumes(input) if err != nil { return err } @@ -455,7 +363,50 @@ func (t *Tagger) setStarted() { t.Lock() t.started = true t.Unlock() - t.Log.Infof("ec2tagger: EC2 tagger has started, finished initial retrieval of tags and Volumes") + t.Log.Info("ec2tagger: EC2 tagger has started, finished initial retrieval of tags and Volumes") +} + +/* + Retrieve metadata from IMDS and use these metadata to: + * Extract InstanceID, ImageID, InstanceType to create custom dimension for collected metrics + * Extract InstanceID to retrieve Instance's Volume and Tags + * Extract Region to create aws session with custom configuration + For more information on IMDS, please follow this document https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html +*/ +func (t *Tagger) deriveEC2MetadataFromIMDS() error { + for _, tag := range t.EC2MetadataTags { + switch tag { + case mdKeyInstanceId: + t.ec2MetadataLookup.instanceId = true + case mdKeyImageId: + t.ec2MetadataLookup.imageId = true + case mdKeyInstanceType: + t.ec2MetadataLookup.instanceType = true + default: + t.Log.Errorf("ec2tagger: Unsupported EC2 Metadata key: %s.", tag) + } + } + + t.Log.Infof("ec2tagger: Check EC2 Metadata.") + doc, err := t.ec2MetadataProvider().GetInstanceIdentityDocument() + if err != nil { + t.Log.Error("ec2tagger: Unable to retrieve EC2 Metadata. This plugin must only be used on an EC2 instance.") + if context.CurrentContext().RunInContainer() { + t.Log.Warn("ec2tagger: Timeout may have occurred because hop limit is too small. Please increase hop limit to 2 by following this document https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-options.html#configuring-IMDS-existing-instances.") + } + return err + } + + t.ec2MetadataRespond.region = doc.Region + t.ec2MetadataRespond.instanceId = doc.InstanceID + if t.ec2MetadataLookup.imageId { + t.ec2MetadataRespond.imageId = doc.ImageID + } + if t.ec2MetadataLookup.instanceType { + t.ec2MetadataRespond.instanceType = doc.InstanceType + } + + return nil } // This function never return until calling updateTags() and updateVolumes() succeed or shutdown happen. @@ -501,7 +452,7 @@ func (t *Tagger) initialRetrievalOfTagsAndVolumes() { } if tagsRetrieved { // volsRetrieved is not checked to keep behavior consistency - t.Log.Infof("ec2tagger: Initial retrieval of tags succeded") + t.Log.Infof("ec2tagger: Initial retrieval of tags succeeded") t.setStarted() return } @@ -530,15 +481,28 @@ func hostJitter(max time.Duration) time.Duration { // init adds this plugin to the framework's "processors" registry func init() { processors.Add("ec2tagger", func() telegraf.Processor { - mdCredentialConfig := &configaws.CredentialConfig{} - mdConfigProvider := mdCredentialConfig.Credentials() + ec2MetadataProvider := func() EC2MetadataAPI { + mdCredentialConfig := &configaws.CredentialConfig{} + return ec2metadata.New( + mdCredentialConfig.Credentials(), + &aws.Config{ + HTTPClient: &http.Client{Timeout: defaultIMDSTimeout}, + LogLevel: configaws.SDKLogLevel(), + Logger: configaws.SDKLogger{}, + Retryer: client.DefaultRetryer{NumMaxRetries: allowedIMDSRetries}, + }) + } ec2Provider := func(ec2CredentialConfig *configaws.CredentialConfig) ec2iface.EC2API { - ec2ConfigProvider := ec2CredentialConfig.Credentials() - return ec2.New(ec2ConfigProvider) + return ec2.New( + ec2CredentialConfig.Credentials(), + &aws.Config{ + LogLevel: configaws.SDKLogLevel(), + Logger: configaws.SDKLogger{}, + }) } return &Tagger{ - ec2metadata: ec2metadata.New(mdConfigProvider), - ec2Provider: ec2Provider, + ec2MetadataProvider: ec2MetadataProvider, + ec2Provider: ec2Provider, } }) } diff --git a/plugins/processors/ec2tagger/ec2tagger_test.go b/plugins/processors/ec2tagger/ec2tagger_test.go index 1554678b12..7870412f1b 100644 --- a/plugins/processors/ec2tagger/ec2tagger_test.go +++ b/plugins/processors/ec2tagger/ec2tagger_test.go @@ -10,11 +10,10 @@ import ( configaws "github.com/aws/amazon-cloudwatch-agent/cfg/aws" "github.com/aws/amazon-cloudwatch-agent/internal" - "github.com/influxdata/telegraf" - "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" + "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/assert" ) @@ -45,20 +44,28 @@ type mockEC2Client struct { } // construct the return results for the mocked DescribeTags api -var tagKey1 = "tagKey1" -var tagVal1 = "tagVal1" -var tagDes1 = ec2.TagDescription{Key: &tagKey1, Value: &tagVal1} +var ( + tagKey1 = "tagKey1" + tagVal1 = "tagVal1" + tagDes1 = ec2.TagDescription{Key: &tagKey1, Value: &tagVal1} +) -var tagKey2 = "tagKey2" -var tagVal2 = "tagVal2" -var tagDes2 = ec2.TagDescription{Key: &tagKey2, Value: &tagVal2} +var ( + tagKey2 = "tagKey2" + tagVal2 = "tagVal2" + tagDes2 = ec2.TagDescription{Key: &tagKey2, Value: &tagVal2} +) -var tagKey3 = "aws:autoscaling:groupName" -var tagVal3 = "ASG-1" -var tagDes3 = ec2.TagDescription{Key: &tagKey3, Value: &tagVal3} +var ( + tagKey3 = "aws:autoscaling:groupName" + tagVal3 = "ASG-1" + tagDes3 = ec2.TagDescription{Key: &tagKey3, Value: &tagVal3} +) -var updatedTagVal2 = "updated-tagVal2" -var updatedTagDes2 = ec2.TagDescription{Key: &tagKey2, Value: &updatedTagVal2} +var ( + updatedTagVal2 = "updated-tagVal2" + updatedTagDes2 = ec2.TagDescription{Key: &tagKey2, Value: &updatedTagVal2} +) func (m *mockEC2Client) DescribeTags(*ec2.DescribeTagsInput) (*ec2.DescribeTagsOutput, error) { //partial tags returned when the DescribeTags api are called initially @@ -109,29 +116,38 @@ func (m *mockEC2Client) DescribeTags(*ec2.DescribeTagsInput) (*ec2.DescribeTagsO } // construct the return results for the mocked DescribeTags api -var device1 = "/dev/xvdc" -var volumeId1 = "vol-0303a1cc896c42d28" -var volumeAttachment1 = ec2.VolumeAttachment{Device: &device1, VolumeId: &volumeId1} -var availabilityZone = "us-east-1a" -var volume1 = ec2.Volume{ - Attachments: []*ec2.VolumeAttachment{&volumeAttachment1}, - AvailabilityZone: &availabilityZone, -} +var ( + device1 = "/dev/xvdc" + volumeId1 = "vol-0303a1cc896c42d28" + volumeAttachmentId1 = "aws://us-east-1a/vol-0303a1cc896c42d28" + volumeAttachment1 = ec2.VolumeAttachment{Device: &device1, VolumeId: &volumeId1} + availabilityZone = "us-east-1a" + volume1 = ec2.Volume{ + Attachments: []*ec2.VolumeAttachment{&volumeAttachment1}, + AvailabilityZone: &availabilityZone, + } +) -var device2 = "/dev/xvdf" -var volumeId2 = "vol-0c241693efb58734a" -var volumeAttachment2 = ec2.VolumeAttachment{Device: &device2, VolumeId: &volumeId2} -var volume2 = ec2.Volume{ - Attachments: []*ec2.VolumeAttachment{&volumeAttachment2}, - AvailabilityZone: &availabilityZone, -} +var ( + device2 = "/dev/xvdf" + volumeId2 = "vol-0c241693efb58734a" + volumeAttachmentId2 = "aws://us-east-1a/vol-0c241693efb58734a" + volumeAttachment2 = ec2.VolumeAttachment{Device: &device2, VolumeId: &volumeId2} + volume2 = ec2.Volume{ + Attachments: []*ec2.VolumeAttachment{&volumeAttachment2}, + AvailabilityZone: &availabilityZone, + } +) -var volumeId2Updated = "vol-0459607897eaa8148" -var volumeAttachment2Updated = ec2.VolumeAttachment{Device: &device2, VolumeId: &volumeId2Updated} -var volume2Updated = ec2.Volume{ - Attachments: []*ec2.VolumeAttachment{&volumeAttachment2Updated}, - AvailabilityZone: &availabilityZone, -} +var ( + volumeId2Updated = "vol-0459607897eaa8148" + volumeAttachmentUpdatedId2 = "aws://us-east-1a/vol-0459607897eaa8148" + volumeAttachment2Updated = ec2.VolumeAttachment{Device: &device2, VolumeId: &volumeId2Updated} + volume2Updated = ec2.Volume{ + Attachments: []*ec2.VolumeAttachment{&volumeAttachment2Updated}, + AvailabilityZone: &availabilityZone, + } +) func (m *mockEC2Client) DescribeVolumes(*ec2.DescribeVolumesInput) (*ec2.DescribeVolumesOutput, error) { //volume1 is the initial disk assigned to an ec2 instance when started @@ -181,8 +197,7 @@ func (m *mockEC2Client) DescribeVolumes(*ec2.DescribeVolumesInput) (*ec2.Describ } type mockEC2Metadata struct { - ec2Metadata - IsAvailable bool + EC2MetadataAPI InstanceIdentityDocument *ec2metadata.EC2InstanceIdentityDocument } @@ -193,10 +208,6 @@ var mockedInstanceIdentityDoc = &ec2metadata.EC2InstanceIdentityDocument{ ImageID: "ami-09edd32d9b0990d49", } -func (m *mockEC2Metadata) Available() bool { - return m.IsAvailable -} - func (m *mockEC2Metadata) GetInstanceIdentityDocument() (ec2metadata.EC2InstanceIdentityDocument, error) { if m.InstanceIdentityDocument != nil { return *m.InstanceIdentityDocument, nil @@ -206,43 +217,35 @@ func (m *mockEC2Metadata) GetInstanceIdentityDocument() (ec2metadata.EC2Instance func TestInitFailWithNoMetadata(t *testing.T) { assert := assert.New(t) - mockMetadata := &mockEC2Metadata{ - IsAvailable: false, - InstanceIdentityDocument: nil, - } - tagger := Tagger{ - Log: testutil.Logger{}, - ec2metadata: mockMetadata, - } - err := tagger.Init() - assert.NotNil(err) - assert.Contains(err.Error(), "ec2tagger: Unable to retrieve InstanceId. This plugin must only be used on an EC2 instance") -} - -func TestInitFailWithNoInstanceIdentityDocument(t *testing.T) { - assert := assert.New(t) - mockMetadata := &mockEC2Metadata{ - IsAvailable: true, + metadataClient := &mockEC2Metadata{ InstanceIdentityDocument: nil, } + ec2MetadataProvider := func() EC2MetadataAPI { + return metadataClient + } tagger := Tagger{ - Log: testutil.Logger{}, - ec2metadata: mockMetadata, + Log: testutil.Logger{}, + ec2MetadataProvider: ec2MetadataProvider, } err := tagger.Init() + assert.NotNil(err) - assert.Contains(err.Error(), "ec2tagger: Unable to retrieve InstanceId :") + assert.Contains(err.Error(), "No instance identity document") } //run Init() and check all tags/volumes are retrieved and saved func TestInitSuccessWithNoTagsVolumesUpdate(t *testing.T) { assert := assert.New(t) - mockMetadata := &mockEC2Metadata{ - IsAvailable: true, + + metadataClient := &mockEC2Metadata{ InstanceIdentityDocument: mockedInstanceIdentityDoc, } + ec2MetadataProvider := func() EC2MetadataAPI { + return metadataClient + } + ec2Client := &mockEC2Client{ tagsCallCount: 0, tagsFailLimit: 0, @@ -262,11 +265,11 @@ func TestInitSuccessWithNoTagsVolumesUpdate(t *testing.T) { Log: testutil.Logger{}, RefreshIntervalSeconds: internal.Duration{Duration: 0}, ec2Provider: ec2Provider, - ec2: ec2Client, - ec2metadata: mockMetadata, - EC2MetadataTags: []string{"InstanceId", "ImageId", "InstanceType"}, - EC2InstanceTagKeys: []string{"tagKey1", "tagKey2", "AutoScalingGroupName"}, - EBSDeviceKeys: []string{"/dev/xvdc", "/dev/xvdf"}, + ec2API: ec2Client, + ec2MetadataProvider: ec2MetadataProvider, + EC2MetadataTags: []string{mdKeyInstanceId, mdKeyImageId, mdKeyInstanceType}, + EC2InstanceTagKeys: []string{tagKey1, tagKey2, "AutoScalingGroupName"}, + EBSDeviceKeys: []string{device1, device2}, } err := tagger.Init() assert.Nil(err) @@ -275,26 +278,21 @@ func TestInitSuccessWithNoTagsVolumesUpdate(t *testing.T) { assert.Equal(3, ec2Client.tagsCallCount) assert.Equal(2, ec2Client.volumesCallCount) //check tags and volumes - expectedTags := map[string]string{ - "tagKey1": "tagVal1", - "tagKey2": "tagVal2", - "AutoScalingGroupName": "ASG-1", - } + expectedTags := map[string]string{tagKey1: tagVal1, tagKey2: tagVal2, "AutoScalingGroupName": tagVal3} assert.Equal(expectedTags, tagger.ec2TagCache) - expectedVolumes := map[string]string{ - "/dev/xvdc": "aws://us-east-1a/vol-0303a1cc896c42d28", - "/dev/xvdf": "aws://us-east-1a/vol-0c241693efb58734a", - } + expectedVolumes := map[string]string{device1: volumeAttachmentId1, device2: volumeAttachmentId2} assert.Equal(expectedVolumes, tagger.ebsVolume.dev2Vol) } //run Init() and check all tags/volumes are retrieved and saved and then updated func TestInitSuccessWithTagsVolumesUpdate(t *testing.T) { assert := assert.New(t) - mockMetadata := &mockEC2Metadata{ - IsAvailable: true, + metadataClient := &mockEC2Metadata{ InstanceIdentityDocument: mockedInstanceIdentityDoc, } + ec2MetadataProvider := func() EC2MetadataAPI { + return metadataClient + } ec2Client := &mockEC2Client{ tagsCallCount: 0, tagsFailLimit: 1, @@ -315,11 +313,11 @@ func TestInitSuccessWithTagsVolumesUpdate(t *testing.T) { //use millisecond rather than second to speed up test execution RefreshIntervalSeconds: internal.Duration{Duration: 20 * time.Millisecond}, ec2Provider: ec2Provider, - ec2: ec2Client, - ec2metadata: mockMetadata, - EC2MetadataTags: []string{"InstanceId", "ImageId", "InstanceType"}, - EC2InstanceTagKeys: []string{"tagKey1", "tagKey2", "AutoScalingGroupName"}, - EBSDeviceKeys: []string{"/dev/xvdc", "/dev/xvdf"}, + ec2API: ec2Client, + ec2MetadataProvider: ec2MetadataProvider, + EC2MetadataTags: []string{mdKeyInstanceId, mdKeyImageId, mdKeyInstanceType}, + EC2InstanceTagKeys: []string{tagKey1, tagKey2, "AutoScalingGroupName"}, + EBSDeviceKeys: []string{device1, device2}, } err := tagger.Init() assert.Nil(err) @@ -327,16 +325,9 @@ func TestInitSuccessWithTagsVolumesUpdate(t *testing.T) { //so that all tags/volumes are retrieved time.Sleep(time.Second) //check tags and volumes - expectedTags := map[string]string{ - "tagKey1": "tagVal1", - "tagKey2": "tagVal2", - "AutoScalingGroupName": "ASG-1", - } + expectedTags := map[string]string{tagKey1: tagVal1, tagKey2: tagVal2, "AutoScalingGroupName": tagVal3} assert.Equal(expectedTags, tagger.ec2TagCache) - expectedVolumes := map[string]string{ - "/dev/xvdc": "aws://us-east-1a/vol-0303a1cc896c42d28", - "/dev/xvdf": "aws://us-east-1a/vol-0c241693efb58734a", - } + expectedVolumes := map[string]string{device1: volumeAttachmentId1, device2: volumeAttachmentId2} assert.Equal(expectedVolumes, tagger.ebsVolume.dev2Vol) //update the tags and volumes @@ -345,16 +336,9 @@ func TestInitSuccessWithTagsVolumesUpdate(t *testing.T) { //assume one second is long enough for the api to be called many times //so that all tags/volumes are updated time.Sleep(time.Second) - expectedTags = map[string]string{ - "tagKey1": "tagVal1", - "tagKey2": "updated-tagVal2", - "AutoScalingGroupName": "ASG-1", - } + expectedTags = map[string]string{tagKey1: tagVal1, tagKey2: updatedTagVal2, "AutoScalingGroupName": tagVal3} assert.Equal(expectedTags, tagger.ec2TagCache) - expectedVolumes = map[string]string{ - "/dev/xvdc": "aws://us-east-1a/vol-0303a1cc896c42d28", - "/dev/xvdf": "aws://us-east-1a/vol-0459607897eaa8148", - } + expectedVolumes = map[string]string{device1: volumeAttachmentId1, device2: volumeAttachmentUpdatedId2} assert.Equal(expectedVolumes, tagger.ebsVolume.dev2Vol) } @@ -362,10 +346,14 @@ func TestInitSuccessWithTagsVolumesUpdate(t *testing.T) { //check there is no attempt to fetch all tags/volumes func TestInitSuccessWithWildcardTagVolumeKey(t *testing.T) { assert := assert.New(t) - mockMetadata := &mockEC2Metadata{ - IsAvailable: true, + + metadataClient := &mockEC2Metadata{ InstanceIdentityDocument: mockedInstanceIdentityDoc, } + ec2MetadataProvider := func() EC2MetadataAPI { + return metadataClient + } + ec2Client := &mockEC2Client{ tagsCallCount: 0, tagsFailLimit: 0, @@ -385,9 +373,9 @@ func TestInitSuccessWithWildcardTagVolumeKey(t *testing.T) { Log: testutil.Logger{}, RefreshIntervalSeconds: internal.Duration{Duration: 0}, ec2Provider: ec2Provider, - ec2: ec2Client, - ec2metadata: mockMetadata, - EC2MetadataTags: []string{"InstanceId", "ImageId", "InstanceType"}, + ec2API: ec2Client, + ec2MetadataProvider: ec2MetadataProvider, + EC2MetadataTags: []string{mdKeyInstanceId, mdKeyImageId, mdKeyInstanceType}, EC2InstanceTagKeys: []string{"*"}, EBSDeviceKeys: []string{"*"}, } @@ -399,23 +387,23 @@ func TestInitSuccessWithWildcardTagVolumeKey(t *testing.T) { assert.Equal(2, ec2Client.tagsCallCount) assert.Equal(1, ec2Client.volumesCallCount) //check partial tags/volumes are saved - expectedTags := map[string]string{ - "tagKey1": "tagVal1", - } + expectedTags := map[string]string{tagKey1: tagVal1} assert.Equal(expectedTags, tagger.ec2TagCache) - expectedVolumes := map[string]string{ - "/dev/xvdc": "aws://us-east-1a/vol-0303a1cc896c42d28", - } + expectedVolumes := map[string]string{device1: volumeAttachmentId1} assert.Equal(expectedVolumes, tagger.ebsVolume.dev2Vol) } //run Init() and then Apply() and check the output metrics contain expected tags func TestApplyWithTagsVolumesUpdate(t *testing.T) { assert := assert.New(t) - mockMetadata := &mockEC2Metadata{ - IsAvailable: true, + + metadataClient := &mockEC2Metadata{ InstanceIdentityDocument: mockedInstanceIdentityDoc, } + ec2MetadataProvider := func() EC2MetadataAPI { + return metadataClient + } + ec2Client := &mockEC2Client{ tagsCallCount: 0, tagsFailLimit: 0, @@ -436,11 +424,11 @@ func TestApplyWithTagsVolumesUpdate(t *testing.T) { //use millisecond rather than second to speed up test execution RefreshIntervalSeconds: internal.Duration{Duration: 20 * time.Millisecond}, ec2Provider: ec2Provider, - ec2: ec2Client, - ec2metadata: mockMetadata, - EC2MetadataTags: []string{"InstanceId", "InstanceType"}, - EC2InstanceTagKeys: []string{"tagKey1", "tagKey2", "AutoScalingGroupName"}, - EBSDeviceKeys: []string{"/dev/xvdc", "/dev/xvdf"}, + ec2API: ec2Client, + ec2MetadataProvider: ec2MetadataProvider, + EC2MetadataTags: []string{mdKeyInstanceId, mdKeyInstanceType}, + EC2InstanceTagKeys: []string{tagKey1, tagKey2, "AutoScalingGroupName"}, + EBSDeviceKeys: []string{device1, device2}, DiskDeviceTagKey: "device", } err := tagger.Init() @@ -462,23 +450,12 @@ func TestApplyWithTagsVolumesUpdate(t *testing.T) { testutil.MustMetric( "disk", map[string]string{ - "device": "/dev/xvdc", - }, - map[string]interface{}{ - "write_bytes": 200, - }, - time.Unix(0, 0), - ), - testutil.MustMetric( - "disk", - map[string]string{ - "device": "/dev/xvdf", + "device": device2, }, map[string]interface{}{ "write_bytes": 135, }, - time.Unix(0, 0), - ), + time.Unix(0, 0)), } output := tagger.Apply(input...) expectedOutput := []telegraf.Metric{ @@ -489,30 +466,14 @@ func TestApplyWithTagsVolumesUpdate(t *testing.T) { "AutoScalingGroupName": "ASG-1", "InstanceId": "i-01d2417c27a396e44", "InstanceType": "m5ad.large", - "tagKey1": "tagVal1", - "tagKey2": "tagVal2", + tagKey1: tagVal1, + tagKey2: tagVal2, }, map[string]interface{}{ "cpu": 0.11, }, time.Unix(0, 0), ), - testutil.MustMetric( - "disk", - map[string]string{ - "AutoScalingGroupName": "ASG-1", - "EBSVolumeId": "aws://us-east-1a/vol-0303a1cc896c42d28", - "InstanceId": "i-01d2417c27a396e44", - "InstanceType": "m5ad.large", - "tagKey1": "tagVal1", - "tagKey2": "tagVal2", - "device": "/dev/xvdc", - }, - map[string]interface{}{ - "write_bytes": 200, - }, - time.Unix(0, 0), - ), testutil.MustMetric( "disk", map[string]string{ @@ -520,9 +481,9 @@ func TestApplyWithTagsVolumesUpdate(t *testing.T) { "EBSVolumeId": "aws://us-east-1a/vol-0c241693efb58734a", "InstanceId": "i-01d2417c27a396e44", "InstanceType": "m5ad.large", - "tagKey1": "tagVal1", - "tagKey2": "tagVal2", - "device": "/dev/xvdf", + tagKey1: tagVal1, + tagKey2: tagVal2, + "device": device2, }, map[string]interface{}{ "write_bytes": 135, @@ -547,30 +508,14 @@ func TestApplyWithTagsVolumesUpdate(t *testing.T) { "AutoScalingGroupName": "ASG-1", "InstanceId": "i-01d2417c27a396e44", "InstanceType": "m5ad.large", - "tagKey1": "tagVal1", - "tagKey2": "updated-tagVal2", + tagKey1: tagVal1, + tagKey2: updatedTagVal2, }, map[string]interface{}{ "cpu": 0.11, }, time.Unix(0, 0), ), - testutil.MustMetric( - "disk", - map[string]string{ - "AutoScalingGroupName": "ASG-1", - "EBSVolumeId": "aws://us-east-1a/vol-0303a1cc896c42d28", - "InstanceId": "i-01d2417c27a396e44", - "InstanceType": "m5ad.large", - "tagKey1": "tagVal1", - "tagKey2": "updated-tagVal2", - "device": "/dev/xvdc", - }, - map[string]interface{}{ - "write_bytes": 200, - }, - time.Unix(0, 0), - ), testutil.MustMetric( "disk", map[string]string{ @@ -578,9 +523,9 @@ func TestApplyWithTagsVolumesUpdate(t *testing.T) { "EBSVolumeId": "aws://us-east-1a/vol-0459607897eaa8148", "InstanceId": "i-01d2417c27a396e44", "InstanceType": "m5ad.large", - "tagKey1": "tagVal1", - "tagKey2": "updated-tagVal2", - "device": "/dev/xvdf", + tagKey1: tagVal1, + tagKey2: updatedTagVal2, + "device": device2, }, map[string]interface{}{ "write_bytes": 135, @@ -594,10 +539,14 @@ func TestApplyWithTagsVolumesUpdate(t *testing.T) { // Test metrics are dropped before the initial retrieval is done func TestMetricsDroppedBeforeStarted(t *testing.T) { assert := assert.New(t) - mockMetadata := &mockEC2Metadata{ - IsAvailable: true, + + metadataClient := &mockEC2Metadata{ InstanceIdentityDocument: mockedInstanceIdentityDoc, } + ec2MetadataProvider := func() EC2MetadataAPI { + return metadataClient + } + ec2Client := &mockEC2Client{ tagsCallCount: 0, tagsFailLimit: 0, @@ -617,9 +566,9 @@ func TestMetricsDroppedBeforeStarted(t *testing.T) { Log: testutil.Logger{}, RefreshIntervalSeconds: internal.Duration{Duration: 0}, ec2Provider: ec2Provider, - ec2: ec2Client, - ec2metadata: mockMetadata, - EC2MetadataTags: []string{"InstanceId", "ImageId", "InstanceType"}, + ec2API: ec2Client, + ec2MetadataProvider: ec2MetadataProvider, + EC2MetadataTags: []string{mdKeyInstanceId, mdKeyImageId, mdKeyInstanceType}, EC2InstanceTagKeys: []string{"*"}, EBSDeviceKeys: []string{"*"}, } @@ -638,7 +587,7 @@ func TestMetricsDroppedBeforeStarted(t *testing.T) { testutil.MustMetric( "disk", map[string]string{ - "device": "/dev/xvdc", + "device": device1, }, map[string]interface{}{ "write_bytes": 200, @@ -648,7 +597,7 @@ func TestMetricsDroppedBeforeStarted(t *testing.T) { testutil.MustMetric( "disk", map[string]string{ - "device": "/dev/xvdf", + "device": device2, }, map[string]interface{}{ "write_bytes": 135, @@ -669,14 +618,11 @@ func TestMetricsDroppedBeforeStarted(t *testing.T) { //check only partial tags/volumes are returned assert.Equal(2, ec2Client.tagsCallCount) assert.Equal(1, ec2Client.volumesCallCount) + //check partial tags/volumes are saved - expectedTags := map[string]string{ - "tagKey1": "tagVal1", - } + expectedTags := map[string]string{tagKey1: tagVal1} assert.Equal(expectedTags, tagger.ec2TagCache) - expectedVolumes := map[string]string{ - "/dev/xvdc": "aws://us-east-1a/vol-0303a1cc896c42d28", - } + expectedVolumes := map[string]string{device1: volumeAttachmentId1} assert.Equal(expectedVolumes, tagger.ebsVolume.dev2Vol) assert.Equal(tagger.started, true) @@ -688,10 +634,14 @@ func TestMetricsDroppedBeforeStarted(t *testing.T) { // Test ec2tagger init does not block for a long time func TestTaggerInitDoesNotBlock(t *testing.T) { assert := assert.New(t) - mockMetadata := &mockEC2Metadata{ - IsAvailable: true, + + metadataClient := &mockEC2Metadata{ InstanceIdentityDocument: mockedInstanceIdentityDoc, } + ec2MetadataProvider := func() EC2MetadataAPI { + return metadataClient + } + ec2Client := &mockEC2Client{ tagsCallCount: 0, tagsFailLimit: 0, @@ -711,9 +661,9 @@ func TestTaggerInitDoesNotBlock(t *testing.T) { Log: testutil.Logger{}, RefreshIntervalSeconds: internal.Duration{Duration: 0}, ec2Provider: ec2Provider, - ec2: ec2Client, - ec2metadata: mockMetadata, - EC2MetadataTags: []string{"InstanceId", "ImageId", "InstanceType"}, + ec2API: ec2Client, + ec2MetadataProvider: ec2MetadataProvider, + EC2MetadataTags: []string{mdKeyInstanceId, mdKeyImageId, mdKeyInstanceType}, EC2InstanceTagKeys: []string{"*"}, EBSDeviceKeys: []string{"*"}, } @@ -737,15 +687,19 @@ func TestTaggerInitDoesNotBlock(t *testing.T) { // Test ec2tagger init does not block for a long time func TestTaggerStartsWithoutTagOrVolume(t *testing.T) { assert := assert.New(t) - mockMetadata := &mockEC2Metadata{ - IsAvailable: true, + + metadataClient := &mockEC2Metadata{ InstanceIdentityDocument: mockedInstanceIdentityDoc, } + ec2MetadataProvider := func() EC2MetadataAPI { + return metadataClient + } + tagger := Tagger{ Log: testutil.Logger{}, RefreshIntervalSeconds: internal.Duration{Duration: 0}, - ec2metadata: mockMetadata, - EC2MetadataTags: []string{"InstanceId", "ImageId", "InstanceType"}, + ec2MetadataProvider: ec2MetadataProvider, + EC2MetadataTags: []string{mdKeyInstanceId, mdKeyImageId, mdKeyInstanceType}, } deadline := time.NewTimer(1 * time.Second) diff --git a/translator/jsonconfig/mergeJsonConfig.go b/translator/jsonconfig/mergeJsonConfig.go index 48d9704c03..f20f9a5d84 100644 --- a/translator/jsonconfig/mergeJsonConfig.go +++ b/translator/jsonconfig/mergeJsonConfig.go @@ -4,7 +4,6 @@ package jsonconfig import ( - "fmt" "log" "os" "sort" @@ -22,15 +21,14 @@ func MergeJsonConfigMaps(jsonConfigMapMap map[string]map[string]interface{}, def if os.Getenv(config.USE_DEFAULT_CONFIG) == config.USE_DEFAULT_CONFIG_TRUE { // When USE_DEFAULT_CONFIG is true, ECS and EKS will be supposed to use different default config. EKS default config logic will be added when necessary if ecsutil.GetECSUtilSingleton().IsECS() { - fmt.Println("No json config files found, use the default ecs config") + log.Println("No json config files found, use the default ecs config") return util.GetJsonMapFromJsonBytes([]byte(config.DefaultECSJsonConfig())) } } if multiConfig == "remove" { - fmt.Println("No json config files found, please provide config, exit now") os.Exit(config.ERR_CODE_NOJSONFILE) } else { - fmt.Println("No json config files found, use the default one") + log.Println("No json config files found, use the default one") } return defaultJsonConfigMap, nil } diff --git a/translator/util/ec2util/ec2util.go b/translator/util/ec2util/ec2util.go index 66eae0eb39..bb873c28f1 100644 --- a/translator/util/ec2util/ec2util.go +++ b/translator/util/ec2util/ec2util.go @@ -4,11 +4,12 @@ package ec2util import ( + "errors" "log" "net" "sync" "time" - + "github.com/aws/amazon-cloudwatch-agent/translator/config" "github.com/aws/amazon-cloudwatch-agent/translator/context" "github.com/aws/aws-sdk-go/aws/ec2metadata" @@ -24,20 +25,23 @@ type ec2Util struct { AccountID string } -const allowedRetries = 5 +var ( + ec2UtilInstance *ec2Util + once sync.Once +) -var e *ec2Util -var once sync.Once +const allowedRetries = 5 func GetEC2UtilSingleton() *ec2Util { once.Do(func() { - e = initEC2UtilSingleton() + ec2UtilInstance = initEC2UtilSingleton() }) - return e + return ec2UtilInstance } func initEC2UtilSingleton() (newInstance *ec2Util) { newInstance = &ec2Util{Region: "", PrivateIP: ""} + if context.CurrentContext().Mode() == config.ModeOnPrem { return } @@ -67,46 +71,49 @@ func initEC2UtilSingleton() (newInstance *ec2Util) { log.Println("W! [EC2] Sleep until network is up") time.Sleep(1 * time.Second) } + if !networkUp { log.Println("E! [EC2] No available network interface") } - ses, err := session.NewSession() + err := newInstance.deriveEC2MetadataFromIMDS() + if err != nil { - log.Println("E! [EC2] getting new session info: ", err) - return + log.Println("E! [EC2] Cannot get EC2 Metadata from IMDS:", err) } - md := ec2metadata.New(ses) - if !md.Available() { - log.Println("E! ec2metadata is not available") - return - } + return +} - if info, err := md.GetMetadata("instance-id"); err == nil { - newInstance.InstanceID = info - } else { - log.Println("E! getting instance-id from EC2 metadata fail: ", err) +func (e *ec2Util) deriveEC2MetadataFromIMDS() error { + ses, err := session.NewSession() + + if err != nil { + return err } - if info, err := md.GetMetadata("hostname"); err == nil { - newInstance.Hostname = info - } else { - log.Println("E! getting hostname from EC2 metadata fail: ", err) + md := ec2metadata.New(ses) + + if !md.Available() { + return errors.New("EC2 metadata is not available.") } - if info, err := md.GetMetadata("local-ipv4"); err == nil { - newInstance.PrivateIP = info + // More information on API: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html#instance-metadata-ex-2 + if hostname, err := md.GetMetadata("hostname"); err == nil { + e.Hostname = hostname } else { - log.Println("E! getting local-ipv4 from EC2 metadata fail: ", err) + log.Println("E! [EC2] Fetch hostname from EC2 metadata fail:", err) } - if info, err := md.GetInstanceIdentityDocument(); err == nil { - newInstance.Region = info.Region - newInstance.AccountID = info.AccountID + // More information on API: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html + if instanceIdentityDocument, err := md.GetInstanceIdentityDocument(); err == nil { + e.Region = instanceIdentityDocument.Region + e.AccountID = instanceIdentityDocument.AccountID + e.PrivateIP = instanceIdentityDocument.PrivateIP + e.InstanceID = instanceIdentityDocument.InstanceID } else { - log.Println("E! fetching identity document from EC2 metadata fail: ", err) + log.Println("E! [EC2] Fetch identity document from EC2 metadata fail:", err) } - return + return nil } diff --git a/translator/util/sdkutil.go b/translator/util/sdkutil.go index f29e3ccd26..415c1486b2 100644 --- a/translator/util/sdkutil.go +++ b/translator/util/sdkutil.go @@ -35,7 +35,7 @@ func DetectAgentMode(configuredMode string) string { fmt.Println("I! Detected from ENV instance is EC2") return config.ModeEC2 } - + if defaultEC2Region() != "" { fmt.Println("I! Detected the instance is EC2") return config.ModeEC2