From a8323cc578e5baa65ec9cfead32864e58bdee963 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Tue, 7 Jul 2020 12:25:31 -0400 Subject: [PATCH 01/43] Squashed commit of the following: commit 67b636352d2b95cdf60e5e1ea90c316f44474fd5 Merge: fd80aff5 afdb125d Author: Ben Napolitan Date: Tue Jul 7 12:22:14 2020 -0400 Merge branch 'upstream-master' into scale-test-single-node commit fd80aff5627b43cc7eba83c77760cd89ac77c035 Author: Ben Napolitan Date: Tue Jul 7 12:20:56 2020 -0400 Forgotten readme commit. commit dae08fd00c721c2bd31d8b982b9c6d966441edcf Author: Ben Napolitan Date: Tue Jul 7 12:20:43 2020 -0400 Fix duration calculation for timeout, remove eksctl, revise readme. commit 80a50fd03586ced7388396cd05202b4d76133fa5 Author: Ben Napolitan Date: Tue Jul 7 01:04:09 2020 -0400 Change image to kubernetes pause. commit 610402262e5bb3471f11e07a4fe1078f79d739e0 Author: Ben Napolitan Date: Mon Jul 6 16:59:36 2020 -0400 Revert back to 98 node startup. commit c7d9a5fef55b0ace2ad10fc70a5793ef3f0fa114 Author: Ben Napolitan Date: Mon Jul 6 14:49:30 2020 -0400 Reduce initial replicas to 1 commit ddf7cd81896beffc12c5aea455c34db012c1e02e Author: Ben Napolitan Date: Mon Jul 6 13:11:50 2020 -0400 Add timeout to performance tests, add content to readme. commit 44092a6801233fac1a664c65dde7beda277e949c Author: Ben Napolitan Date: Mon Jul 6 11:56:52 2020 -0400 Revert image to google. commit 2c8291e3f892d7182dc930648fbe8c6c0fd615ae Author: Ben Napolitan Date: Thu Jul 2 15:36:18 2020 -0400 Don't exit if s3 bucket upload fails. commit 318101aa967a9cfb1f3459c2cfd512c12af864d7 Author: Ben Napolitan Date: Thu Jul 2 13:37:36 2020 -0400 Fix file path issue. commit 16254ad256c3a1150fb53250f49200a7e3f3388e Author: Ben Napolitan Date: Wed Jul 1 17:07:12 2020 -0400 Fix CircleCI yml syntax error. commit 43dd11deb255af6a0d5fc38968e2990df2fd98ae Author: Ben Napolitan Date: Wed Jul 1 17:05:34 2020 -0400 Configure weekly performance. commit d9b58bba297b4a80dc73032964fe6ccf233068eb Author: Ben Napolitan Date: Wed Jul 1 16:57:17 2020 -0400 Start mng with 1 node, put metadata into data file names, suppress copy errors. commit 5bab04d97fd9507a67bfd274dc3746ed3fc3d5f9 Author: Ben Napolitan Date: Wed Jul 1 02:43:28 2020 -0400 Changes from PR. commit 72a8608c5947b5583dd6584772a82fa2434c6e8d Author: Ben Napolitan Date: Fri Jun 26 11:58:25 2020 -0400 Squashed commit of the following: commit 5aac3580040f2f6622814915b5e3ec5c3a1b04d2 Merge: 0bcf24b4 30f98bd1 Author: Ben Napolitan Date: Fri Jun 26 11:57:31 2020 -0400 Merge branch 'upstream-master' into scale-test-single-node-old commit 0bcf24b4cbb7cbaffe54e6b2cfdd012b45180df5 Author: Ben Napolitan Date: Fri Jun 26 11:55:48 2020 -0400 Revert rolling update change. commit 53866a01bba92d33e77cf86aa2d4947269f40328 Author: Ben Napolitan Date: Thu Jun 25 16:22:33 2020 -0400 Increase rollingupdate limit. commit 966466acbfd502207ebd976f97dcad4ef9caeeab Author: Ben Napolitan Date: Thu Jun 25 11:01:07 2020 -0400 Fix environment unset environment variables. commit f42928333ba3eb7fa5ef0200a9c1991d7774d03c Author: Ben Napolitan Date: Wed Jun 24 13:26:51 2020 -0400 Remove sleeps, deleted load balancers in test account. commit 166a168b1864f712381da1f413d3401c4733e39c Author: Ben Napolitan Date: Wed Jun 24 09:21:17 2020 -0400 Attempt all scale tests. commit 81dd0aaf39af12eeb85af1bcf33f5edd29c587a3 Author: Ben Napolitan Date: Tue Jun 23 12:31:48 2020 -0400 Try adding all node groups back. commit 828f7aa9a9132ffa856fa830b3501011bfe4a4df Author: Ben Napolitan Date: Tue Jun 23 11:37:35 2020 -0400 Attempt only large performance test and no conformance. commit 82a80e74155510b2f81abbc22e62ef7f0ee3b32c Author: Ben Napolitan Date: Mon Jun 22 18:02:59 2020 -0400 Try deleting other node groups. commit 284fcd1845c4fb98c78d8bc999ae5ef72ef8d520 Author: Ben Napolitan Date: Mon Jun 22 16:13:47 2020 -0400 Trying again. commit e5ef16b22b3c2f6a55f8e512e67e4f338f6a1215 Author: Ben Napolitan Date: Mon Jun 22 16:10:20 2020 -0400 Altar size again. commit d1e0062335ff09fd918ad7ccfd67fe7271a0ea55 Author: Ben Napolitan Date: Mon Jun 22 12:53:06 2020 -0400 Attempt instance size change. commit 686e7f21a0b918857bd66534b7f8ad08a5e9f97e Author: Ben Napolitan Date: Fri Jun 19 16:47:51 2020 -0400 Fix duplicate name. commit e17358c7e942ed8a84e1b26431d3ebce329ad51f Author: Ben Napolitan Date: Fri Jun 19 14:04:58 2020 -0400 Attempt 5000 pod scale test. commit e9ea95dc3c1f7b782dc42395865bcd78321a4302 Author: Ben Napolitan Date: Thu Jun 18 17:53:28 2020 -0400 Attempt 730 pods on one node performance test. commit cad25aff1ac21fa37dfe81f6a72153bbe0c3db8a Author: Ben Napolitan Date: Thu Jun 18 13:26:51 2020 -0400 Fix file output syntax. commit 974ac0e6832eff6274b40033b4eee249cf483810 Author: Ben Napolitan Date: Thu Jun 18 11:42:30 2020 -0400 Verify scale test uploading works. commit b7efa1001600ce45788dc657e907486c0aa66022 Author: Ben Napolitan Date: Wed Jun 17 17:56:32 2020 -0400 Create data file after scale test. commit 3a9eaec1abc5be083776631decef9d4d8ff98511 Author: Ben Napolitan Date: Mon Jun 15 14:27:37 2020 -0400 Fix if syntax. commit 00d74bc7425d213619ce020be29eec83be8dd163 Author: Ben Napolitan Date: Mon Jun 15 11:36:03 2020 -0400 Run scale tests moved and hidden behind env var. commit ef6841ea1600ac0d0f33f5e675882d8f8c0ecb3b Author: Ben Napolitan Date: Sat Jun 13 21:35:21 2020 -0400 Fix grep causing failure. commit 4fbce7eb0556d034814e7cb4d973f9fa3de04c7c Author: Ben Napolitan Date: Sat Jun 13 18:37:11 2020 -0400 Reduce sleep for scale test. commit d766018ef13e7ae24eff4be038ccf36761ec542f Author: Ben Napolitan Date: Sat Jun 13 13:32:50 2020 -0400 Try to diagnose polling problem. commit 1ac7d354ac172a070c8d1c21158a75625537af6d Author: Ben Napolitan Date: Fri Jun 12 17:46:54 2020 -0400 Run scale test for 130 pods. commit 9933a0981865e44202b0a32201efdaf97cda9eec Author: Ben Napolitan Date: Fri Jun 12 13:29:32 2020 -0400 Add new nodegroup and move directory copy to proper place. commit 470116cddf7ffa318bf27c589ce1c406b964977b Author: Ben Napolitan Date: Fri Jun 12 12:04:48 2020 -0400 Move to after kubeconfig. commit 1f1f0fbfcfe9714e39e31332f4409f166ea2fb4d Author: Ben Napolitan Date: Fri Jun 12 01:19:04 2020 -0400 Switch to use KUBECTL_PATH. commit 1b43268c92b520dac1f672e1f9c47dd92c41db92 Author: Ben Napolitan Date: Thu Jun 11 23:46:58 2020 -0400 Retry with one nodegroup. commit b0d3228959b856c43712d2d60412cb33dfc94178 Author: Ben Napolitan Date: Thu Jun 11 23:00:48 2020 -0400 Try to create new nodegroup and apply deployment to it. commit abd90157d456d00ad1125e185075fb26a4d00584 Author: Ben Napolitan Date: Thu Jun 11 21:25:40 2020 -0400 Correct cluster name and change region in CircleCI. commit 46fe54fd62ded4968c2a89b89dcf6210de888ee4 Author: Ben Napolitan Date: Thu Jun 11 19:03:03 2020 -0400 Get info for eksctl. commit bbb3557abd659c83610473eb87ad3551e3f362d4 Author: Ben Napolitan Date: Wed Jun 10 16:08:26 2020 -0400 Attempt to ssh into test run. commit 353130b93f8f12286131971a2428244501b91181 Author: Ben Napolitan Date: Wed Jun 10 14:22:18 2020 -0400 Delete eks nodegroup create. commit 0ff75894a9fcb253d24755ce20dc7400e36ed742 Author: Ben Napolitan Date: Wed Jun 10 13:14:51 2020 -0400 Try to use eksctl. commit 3ec6da45752d341533522e14b514d14d122f7864 Author: Ben Napolitan Date: Wed Jun 10 12:28:23 2020 -0400 Syntax fix. commit e79b32f85debfc7a11159b393b949afa9601a6f5 Author: Ben Napolitan Date: Tue Jun 9 19:55:25 2020 -0400 Trying to create nodegroup and deploy pods. --- .circleci/config.yml | 44 +++++++ scripts/lib/cluster.sh | 10 +- scripts/lib/common.sh | 8 +- scripts/lib/performance_tests.sh | 209 +++++++++++++++++++++++++++++++ scripts/run-integration-tests.sh | 33 ++++- test/integration/README.md | 45 +++++++ testdata/deploy-130-pods.yaml | 26 ++++ testdata/deploy-5000-pods.yaml | 26 ++++ testdata/deploy-730-pods.yaml | 26 ++++ 9 files changed, 419 insertions(+), 8 deletions(-) create mode 100644 scripts/lib/performance_tests.sh create mode 100644 test/integration/README.md create mode 100644 testdata/deploy-130-pods.yaml create mode 100644 testdata/deploy-5000-pods.yaml create mode 100644 testdata/deploy-730-pods.yaml diff --git a/.circleci/config.yml b/.circleci/config.yml index 313f810f6b..47d1583564 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -82,6 +82,38 @@ jobs: - store_artifacts: path: /tmp/cni-test + performance_test: + docker: + - image: circleci/golang:1.13-stretch + working_directory: /go/src/github.com/{{ORG_NAME}}/{{REPO_NAME}} + environment: + <<: *env + RUN_CONFORMANCE: "false" + RUN_PERFORMANCE_TESTS: "true" + steps: + - checkout + - setup_remote_docker + - aws-cli/setup: + profile-name: awstester + - restore_cache: + keys: + - dependency-packages-store-{{ checksum "test/integration/go.mod" }} + - dependency-packages-store- + - k8s/install-kubectl: + # requires 1.14.9 for k8s testing, since it uses log api. + kubectl-version: v1.14.9 + - run: + name: Run the integration tests + command: ./scripts/run-integration-tests.sh + no_output_timeout: 15m + - save_cache: + key: dependency-packages-store-{{ checksum "test/integration/go.mod" }} + paths: + - /go/pkg + when: always + - store_artifacts: + path: /tmp/cni-test + workflows: version: 2 check: @@ -118,3 +150,15 @@ workflows: - master jobs: - integration_test + + # triggers weekly tests on master + weekly-test-run: + triggers: + - schedule: + cron: "0 0 * * 6" + filters: + branches: + only: + - master + jobs: + - performance_test diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index d3d780a96d..1e5c646c77 100644 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -12,6 +12,14 @@ function down-test-cluster() { } function up-test-cluster() { + MNGS="" + if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then + MNGS='{"three-nodes":{"name":"three-nodes","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "single-node":{"name":"single-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "multi-node":{"name":"multi-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":98,"instance-types":["m5.xlarge"],"volume-size":40}}' + RUN_CONFORMANCE=false + else + MNGS='{"GetRef.Name-mng-for-cni":{"name":"GetRef.Name-mng-for-cni","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["c5.xlarge"],"volume-size":40}}' + fi + echo -n "Configuring cluster $CLUSTER_NAME" AWS_K8S_TESTER_EKS_NAME=$CLUSTER_NAME \ AWS_K8S_TESTER_EKS_LOG_COLOR=true \ @@ -26,7 +34,7 @@ function up-test-cluster() { AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ENABLE=true \ AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ROLE_CREATE=$ROLE_CREATE \ AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ROLE_ARN=$ROLE_ARN \ - AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_MNGS='{"GetRef.Name-mng-for-cni":{"name":"GetRef.Name-mng-for-cni","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["c5.xlarge"],"volume-size":40}}' \ + AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_MNGS=$MNGS \ AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_FETCH_LOGS=true \ AWS_K8S_TESTER_EKS_ADD_ON_NLB_HELLO_WORLD_ENABLE=true \ AWS_K8S_TESTER_EKS_ADD_ON_ALB_2048_ENABLE=true \ diff --git a/scripts/lib/common.sh b/scripts/lib/common.sh index c01637245e..38788454da 100644 --- a/scripts/lib/common.sh +++ b/scripts/lib/common.sh @@ -25,6 +25,12 @@ function display_timelines() { echo "TIMELINE: Default CNI integration tests took $DEFAULT_INTEGRATION_DURATION seconds." echo "TIMELINE: Updating CNI image took $CNI_IMAGE_UPDATE_DURATION seconds." echo "TIMELINE: Current image integration tests took $CURRENT_IMAGE_INTEGRATION_DURATION seconds." - echo "TIMELINE: Conformance tests took $CONFORMANCE_DURATION seconds." + if [[ "$RUN_CONFORMANCE" == true ]]; then + echo "TIMELINE: Conformance tests took $CONFORMANCE_DURATION seconds." + fi + if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then + echo "TIMELINE: Performance tests took $PERFORMANCE_DURATION seconds." + fi echo "TIMELINE: Down processes took $DOWN_DURATION seconds." } + diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh new file mode 100644 index 0000000000..848c81155f --- /dev/null +++ b/scripts/lib/performance_tests.sh @@ -0,0 +1,209 @@ +function check_for_timeout() { + if [[ $((SECONDS - $1)) -gt 10000 ]]; then + RUNNING_PERFORMANCE=false + on_error + fi +} + +function run_performance_test_130_pods() { + echo "Running performance tests against cluster" + RUNNING_PERFORMANCE=true + + DEPLOY_START=$SECONDS + + SCALE_UP_DURATION_ARRAY=() + SCALE_DOWN_DURATION_ARRAY=() + while [ ${#SCALE_UP_DURATION_ARRAY[@]} -lt 3 ] + do + ITERATION_START=$SECONDS + $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=130 + sleep 20 + while [[ ! $($KUBECTL_PATH get deploy | grep 130/130) ]] + do + sleep 1 + echo "Scaling UP" + echo $($KUBECTL_PATH get deploy) + check_for_timeout $DEPLOY_START + done + + SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + MIDPOINT_START=$SECONDS + $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=0 + while [[ $($KUBECTL_PATH get pods) ]] + do + sleep 1 + echo "Scaling DOWN" + echo $($KUBECTL_PATH get deploy) + check_for_timeout $DEPLOY_START + done + SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + done + + echo "Times to scale up:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_UP_DURATION_ARRAY[@]} ] + do + echo ${SCALE_UP_DURATION_ARRAY[$INDEX]} + INDEX=$((INDEX + 1)) + done + echo "" + echo "Times to scale down:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_DOWN_DURATION_ARRAY[@]} ] + do + echo "${SCALE_DOWN_DURATION_ARRAY[$INDEX]} seconds" + INDEX=$((INDEX + 1)) + done + echo "" + DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) + + now="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" + echo $now + + echo $(date +"%m-%d-%Y-%T") >> $now + echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $now + echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $now + echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $now + + cat $now + aws s3 cp $now s3://cni-performance-test-data + + echo "TIMELINE: 130 Pod performance test took $DEPLOY_DURATION seconds." + RUNNING_PERFORMANCE=false +} + +function run_performance_test_730_pods() { + echo "Running performance tests against cluster" + RUNNING_PERFORMANCE=true + + DEPLOY_START=$SECONDS + + SCALE_UP_DURATION_ARRAY=() + SCALE_DOWN_DURATION_ARRAY=() + while [ ${#SCALE_UP_DURATION_ARRAY[@]} -lt 3 ] + do + ITERATION_START=$SECONDS + $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=730 + sleep 100 + while [[ ! $($KUBECTL_PATH get deploy | grep 730/730) ]] + do + sleep 2 + echo "Scaling UP" + echo $($KUBECTL_PATH get deploy) + check_for_timeout $DEPLOY_START + done + + SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + MIDPOINT_START=$SECONDS + $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=0 + sleep 100 + while [[ $($KUBECTL_PATH get pods) ]] + do + sleep 2 + echo "Scaling DOWN" + echo $($KUBECTL_PATH get deploy) + check_for_timeout $DEPLOY_START + done + SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + done + + echo "Times to scale up:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_UP_DURATION_ARRAY[@]} ] + do + echo ${SCALE_UP_DURATION_ARRAY[$INDEX]} + INDEX=$((INDEX + 1)) + done + echo "" + echo "Times to scale down:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_DOWN_DURATION_ARRAY[@]} ] + do + echo "${SCALE_DOWN_DURATION_ARRAY[$INDEX]} seconds" + INDEX=$((INDEX + 1)) + done + echo "" + DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) + + now="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" + echo $now + + echo $(date +"%m-%d-%Y-%T") >> $now + echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $now + echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $now + echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $now + + cat $now + aws s3 cp $now s3://cni-performance-test-data + + echo "TIMELINE: 730 Pod performance test took $DEPLOY_DURATION seconds." + RUNNING_PERFORMANCE=false +} + +function run_performance_test_5000_pods() { + echo "Running performance tests against cluster" + RUNNING_PERFORMANCE=true + + DEPLOY_START=$SECONDS + + SCALE_UP_DURATION_ARRAY=() + SCALE_DOWN_DURATION_ARRAY=() + while [ ${#SCALE_UP_DURATION_ARRAY[@]} -lt 3 ] + do + ITERATION_START=$SECONDS + $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=5000 + sleep 100 + while [[ ! $($KUBECTL_PATH get deploy | grep 5000/5000) ]] + do + sleep 2 + echo "Scaling UP" + echo $($KUBECTL_PATH get deploy) + check_for_timeout $DEPLOY_START + done + + SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + MIDPOINT_START=$SECONDS + $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=0 + sleep 100 + while [[ $($KUBECTL_PATH get pods) ]] + do + sleep 2 + echo "Scaling DOWN" + echo $($KUBECTL_PATH get deploy) + check_for_timeout $DEPLOY_START + done + SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + done + + echo "Times to scale up:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_UP_DURATION_ARRAY[@]} ] + do + echo ${SCALE_UP_DURATION_ARRAY[$INDEX]} + INDEX=$((INDEX + 1)) + done + echo "" + echo "Times to scale down:" + INDEX=0 + while [ $INDEX -lt ${#SCALE_DOWN_DURATION_ARRAY[@]} ] + do + echo "${SCALE_DOWN_DURATION_ARRAY[$INDEX]} seconds" + INDEX=$((INDEX + 1)) + done + echo "" + DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) + + now="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" + echo $now + + echo $(date +"%m-%d-%Y-%T") >> $now + echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $now + echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $now + echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $now + + cat $now + aws s3 cp $now s3://cni-performance-test-data + + echo "TIMELINE: 5000 Pod performance test took $DEPLOY_DURATION seconds." + RUNNING_PERFORMANCE=false +} diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index 0abac20081..54e322b821 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -8,6 +8,7 @@ DIR=$(cd "$(dirname "$0")"; pwd) source "$DIR"/lib/common.sh source "$DIR"/lib/aws.sh source "$DIR"/lib/cluster.sh +source "$DIR"/lib/performance_tests.sh # Variables used in /lib/aws.sh OS=$(go env GOOS) @@ -19,6 +20,8 @@ ARCH=$(go env GOARCH) : "${DEPROVISION:=true}" : "${BUILD:=true}" : "${RUN_CONFORMANCE:=false}" +: "${RUN_PERFORMANCE_TESTS:=false}" +: "${RUNNING_PERFORMANCE:=false}" __cluster_created=0 __cluster_deprovisioned=0 @@ -26,13 +29,15 @@ __cluster_deprovisioned=0 on_error() { # Make sure we destroy any cluster that was created if we hit run into an # error when attempting to run tests against the cluster - if [[ $__cluster_created -eq 1 && $__cluster_deprovisioned -eq 0 && "$DEPROVISION" == true ]]; then - # prevent double-deprovisioning with ctrl-c during deprovisioning... - __cluster_deprovisioned=1 - echo "Cluster was provisioned already. Deprovisioning it..." - down-test-cluster + if [[ $RUNNING_PERFORMANCE == false ]]; then + if [[ $__cluster_created -eq 1 && $__cluster_deprovisioned -eq 0 && "$DEPROVISION" == true ]]; then + # prevent double-deprovisioning with ctrl-c during deprovisioning... + __cluster_deprovisioned=1 + echo "Cluster was provisioned already. Deprovisioning it..." + down-test-cluster + fi + exit 1 fi - exit 1 } # test specific config, results location @@ -213,6 +218,22 @@ if [[ $TEST_PASS -eq 0 && "$RUN_CONFORMANCE" == true ]]; then echo "TIMELINE: Conformance tests took $CONFORMANCE_DURATION seconds." fi +if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then + START=$SECONDS + $KUBECTL_PATH apply -f ./testdata/deploy-130-pods.yaml + run_performance_test_130_pods + $KUBECTL_PATH delete -f ./testdata/deploy-130-pods.yaml + + $KUBECTL_PATH apply -f ./testdata/deploy-730-pods.yaml + run_performance_test_730_pods + $KUBECTL_PATH delete -f ./testdata/deploy-730-pods.yaml + + $KUBECTL_PATH apply -f ./testdata/deploy-5000-pods.yaml + run_performance_test_5000_pods + $KUBECTL_PATH delete -f ./testdata/deploy-5000-pods.yaml + PERFORMANCE_DURATION=$((SECONDS - START)) +fi + if [[ "$DEPROVISION" == true ]]; then START=$SECONDS diff --git a/test/integration/README.md b/test/integration/README.md new file mode 100644 index 0000000000..0d575ea22b --- /dev/null +++ b/test/integration/README.md @@ -0,0 +1,45 @@ +## How to run tests +# All tests + * set AWS_ACCESS_KEY_ID + * set AWS_SECRET_ACCESS_KEY + * set AWS_DEFAULT_REGION (optional, defaults to us-west-2 if not set) + * approve test after build completes +# Performance + * run from cni test account to upload test results + * set RUN_PERFORMANCE_TESTS=true +# KOPS + * set RUN_KOPS_TEST=true + * will occassionally fail/flake tests, try re-running test a couple times to ensure there is a problem + +## Conformance test duration log + +* May 20, 2020: Initial integration step took roughly 3h 41min +* May 27: 3h 1min + * Skip tests labeled as “Slow” for Ginkgo framework + * Timelines: + * Default CNI: 73s + * Updating CNI image: 110s + * Current image integration: 47s + * Conformance tests: 119.167 min (2 hrs) + * Down cluster: 30 min +* May 29: 2h 59min 30s + * Cache dependencies when testing default CNI + * Timelines: + * Docker build: 4 min + * Up test cluster: 31 min + * Default CNI: 50s + * Updating CNI image: 92s + * Current image integration: 17s + * Conformance tests: 114 min (1.9 hrs) + * Down cluster: 30 min +* June 5: 1h 24min 9s + * Parallel execution of conformance tests + * Timelines: + * Docker build: 3 min + * Up test cluster: 31 min + * Default CNI: 52s + * Updating CNI image: 92s + * Current image integration: 18s + * Conformance tests: 16 min + * Down cluster: 30 min + diff --git a/testdata/deploy-130-pods.yaml b/testdata/deploy-130-pods.yaml new file mode 100644 index 0000000000..1f4552d3b3 --- /dev/null +++ b/testdata/deploy-130-pods.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-130-pods +spec: + replicas: 1 + selector: + matchLabels: + app: deploy-130-pods + template: + metadata: + name: test-pod-130 + labels: + app: deploy-130-pods + tier: backend + track: stable + spec: + containers: + - name: hello + image: "kubernetes/pause:latest" + ports: + - name: http + containerPort: 80 + imagePullPolicy: IfNotPresent + nodeSelector: + eks.amazonaws.com/nodegroup: three-nodes diff --git a/testdata/deploy-5000-pods.yaml b/testdata/deploy-5000-pods.yaml new file mode 100644 index 0000000000..cb760f81fc --- /dev/null +++ b/testdata/deploy-5000-pods.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-5000-pods +spec: + replicas: 1 + selector: + matchLabels: + app: deploy-5000-pods + template: + metadata: + name: test-pod-5000 + labels: + app: deploy-5000-pods + tier: backend + track: stable + spec: + containers: + - name: hello + image: "kubernetes/pause:latest" + ports: + - name: http + containerPort: 80 + imagePullPolicy: IfNotPresent + nodeSelector: + eks.amazonaws.com/nodegroup: multi-node diff --git a/testdata/deploy-730-pods.yaml b/testdata/deploy-730-pods.yaml new file mode 100644 index 0000000000..48db130811 --- /dev/null +++ b/testdata/deploy-730-pods.yaml @@ -0,0 +1,26 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-730-pods +spec: + replicas: 1 + selector: + matchLabels: + app: deploy-730-pods + template: + metadata: + name: test-pod-730 + labels: + app: deploy-730-pods + tier: backend + track: stable + spec: + containers: + - name: hello + image: "kubernetes/pause:latest" + ports: + - name: http + containerPort: 80 + imagePullPolicy: IfNotPresent + nodeSelector: + eks.amazonaws.com/nodegroup: single-node From c8b1f9bee41555ccfd3b05020e14bcc05afd71a4 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Wed, 8 Jul 2020 18:16:39 -0400 Subject: [PATCH 02/43] Create S3 bucket filename var, remove sleeps before polling, and refactor some code. --- scripts/lib/cluster.sh | 1 + scripts/lib/performance_tests.sh | 71 +++++++++++++++++++------------- scripts/run-integration-tests.sh | 8 ---- test/integration/README.md | 16 +++++++ 4 files changed, 59 insertions(+), 37 deletions(-) diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index 1e5c646c77..6f3612320a 100644 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -16,6 +16,7 @@ function up-test-cluster() { if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then MNGS='{"three-nodes":{"name":"three-nodes","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "single-node":{"name":"single-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "multi-node":{"name":"multi-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":98,"instance-types":["m5.xlarge"],"volume-size":40}}' RUN_CONFORMANCE=false + PERFORMANCE_TEST_S3_BUCKET_NAME:="" else MNGS='{"GetRef.Name-mng-for-cni":{"name":"GetRef.Name-mng-for-cni","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["c5.xlarge"],"volume-size":40}}' fi diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 848c81155f..5a32a5b15e 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -8,6 +8,7 @@ function check_for_timeout() { function run_performance_test_130_pods() { echo "Running performance tests against cluster" RUNNING_PERFORMANCE=true + $KUBECTL_PATH apply -f ./testdata/deploy-130-pods.yaml DEPLOY_START=$SECONDS @@ -17,7 +18,6 @@ function run_performance_test_130_pods() { do ITERATION_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=130 - sleep 20 while [[ ! $($KUBECTL_PATH get deploy | grep 130/130) ]] do sleep 1 @@ -57,24 +57,30 @@ function run_performance_test_130_pods() { echo "" DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) - now="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" - echo $now + filename="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" + echo $filename - echo $(date +"%m-%d-%Y-%T") >> $now - echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $now - echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $now - echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $now + echo $(date +"%m-%d-%Y-%T") >> $filename + echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename + echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename + echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename - cat $now - aws s3 cp $now s3://cni-performance-test-data + cat $filename + if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then + aws s3 cp $filename $PERFORMANCE_TEST_S3_BUCKET_NAME + else + echo "No S3 bucket name given, skipping test result upload." + fi echo "TIMELINE: 130 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false + $KUBECTL_PATH delete -f ./testdata/deploy-130-pods.yaml } function run_performance_test_730_pods() { echo "Running performance tests against cluster" RUNNING_PERFORMANCE=true + $KUBECTL_PATH apply -f ./testdata/deploy-730-pods.yaml DEPLOY_START=$SECONDS @@ -84,7 +90,6 @@ function run_performance_test_730_pods() { do ITERATION_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=730 - sleep 100 while [[ ! $($KUBECTL_PATH get deploy | grep 730/730) ]] do sleep 2 @@ -96,7 +101,6 @@ function run_performance_test_730_pods() { SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=0 - sleep 100 while [[ $($KUBECTL_PATH get pods) ]] do sleep 2 @@ -125,24 +129,30 @@ function run_performance_test_730_pods() { echo "" DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) - now="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" - echo $now + filename="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" + echo $filename - echo $(date +"%m-%d-%Y-%T") >> $now - echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $now - echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $now - echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $now + echo $(date +"%m-%d-%Y-%T") >> $filename + echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename + echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename + echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename - cat $now - aws s3 cp $now s3://cni-performance-test-data + cat $filename + if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then + aws s3 cp $filename $PERFORMANCE_TEST_S3_BUCKET_NAME + else + echo "No S3 bucket name given, skipping test result upload." + fi echo "TIMELINE: 730 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false + $KUBECTL_PATH delete -f ./testdata/deploy-730-pods.yaml } function run_performance_test_5000_pods() { echo "Running performance tests against cluster" RUNNING_PERFORMANCE=true + $KUBECTL_PATH apply -f ./testdata/deploy-5000-pods.yaml DEPLOY_START=$SECONDS @@ -152,7 +162,6 @@ function run_performance_test_5000_pods() { do ITERATION_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=5000 - sleep 100 while [[ ! $($KUBECTL_PATH get deploy | grep 5000/5000) ]] do sleep 2 @@ -164,7 +173,6 @@ function run_performance_test_5000_pods() { SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=0 - sleep 100 while [[ $($KUBECTL_PATH get pods) ]] do sleep 2 @@ -193,17 +201,22 @@ function run_performance_test_5000_pods() { echo "" DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) - now="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" - echo $now + filename="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" + echo $filename - echo $(date +"%m-%d-%Y-%T") >> $now - echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $now - echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $now - echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $now + echo $(date +"%m-%d-%Y-%T") >> $filename + echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename + echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename + echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename - cat $now - aws s3 cp $now s3://cni-performance-test-data + cat $filename + if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then + aws s3 cp $filename $PERFORMANCE_TEST_S3_BUCKET_NAME + else + echo "No S3 bucket name given, skipping test result upload." + fi echo "TIMELINE: 5000 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false + $KUBECTL_PATH delete -f ./testdata/deploy-5000-pods.yaml } diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index 54e322b821..86e7c43e7c 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -220,17 +220,9 @@ fi if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then START=$SECONDS - $KUBECTL_PATH apply -f ./testdata/deploy-130-pods.yaml run_performance_test_130_pods - $KUBECTL_PATH delete -f ./testdata/deploy-130-pods.yaml - - $KUBECTL_PATH apply -f ./testdata/deploy-730-pods.yaml run_performance_test_730_pods - $KUBECTL_PATH delete -f ./testdata/deploy-730-pods.yaml - - $KUBECTL_PATH apply -f ./testdata/deploy-5000-pods.yaml run_performance_test_5000_pods - $KUBECTL_PATH delete -f ./testdata/deploy-5000-pods.yaml PERFORMANCE_DURATION=$((SECONDS - START)) fi diff --git a/test/integration/README.md b/test/integration/README.md index 0d575ea22b..0e6bf7898d 100644 --- a/test/integration/README.md +++ b/test/integration/README.md @@ -4,13 +4,18 @@ * set AWS_SECRET_ACCESS_KEY * set AWS_DEFAULT_REGION (optional, defaults to us-west-2 if not set) * approve test after build completes + # Performance * run from cni test account to upload test results + * set PERFORMANCE_TEST_S3_BUCKET_NAME to the name of the bucket (likely s3://cni-performance-test-data) * set RUN_PERFORMANCE_TESTS=true + # KOPS * set RUN_KOPS_TEST=true * will occassionally fail/flake tests, try re-running test a couple times to ensure there is a problem + + ## Conformance test duration log * May 20, 2020: Initial integration step took roughly 3h 41min @@ -43,3 +48,14 @@ * Conformance tests: 16 min * Down cluster: 30 min + + +## How to Manually delete k8s tester Resources (order of deletion) + +Cloudformation - (all except cluster, vpc, rol-mng) +EC2 - load balancers, key pair +VPC - Nat gateways, Elastic IPs(after a minute), internet gateway +Cloudformation - cluster +EC2 - network interfaces, security groups +VPC - subnet, route tables +Cloudformation - cluster, vpc(after cluster deletes) \ No newline at end of file From 0cf8c5a8532554b9ebb2605a44e6a6508fd87397 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Wed, 8 Jul 2020 18:39:14 -0400 Subject: [PATCH 03/43] Syntax fix. --- scripts/lib/cluster.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index 6f3612320a..212acab0ee 100644 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -16,7 +16,7 @@ function up-test-cluster() { if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then MNGS='{"three-nodes":{"name":"three-nodes","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "single-node":{"name":"single-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "multi-node":{"name":"multi-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":98,"instance-types":["m5.xlarge"],"volume-size":40}}' RUN_CONFORMANCE=false - PERFORMANCE_TEST_S3_BUCKET_NAME:="" + : "${PERFORMANCE_TEST_S3_BUCKET_NAME:=""}" else MNGS='{"GetRef.Name-mng-for-cni":{"name":"GetRef.Name-mng-for-cni","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["c5.xlarge"],"volume-size":40}}' fi From cf2b5d855c505246483275332db882c988ef845d Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Thu, 9 Jul 2020 14:17:09 -0400 Subject: [PATCH 04/43] Add version info to file, display start of performance tests. --- scripts/lib/performance_tests.sh | 59 +++++++++++--------------------- scripts/run-integration-tests.sh | 3 ++ test/integration/README.md | 2 +- 3 files changed, 24 insertions(+), 40 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 5a32a5b15e..b84bdcfe47 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -5,6 +5,23 @@ function check_for_timeout() { fi } +function save_results_to_file() { + echo $filename + + echo $(date +"%m-%d-%Y-%T") >> $filename + echo $TEST_IMAGE_VERSION >> $filename + echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename + echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename + echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename + + cat $filename + if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then + aws s3 cp $filename $PERFORMANCE_TEST_S3_BUCKET_NAME + else + echo "No S3 bucket name given, skipping test result upload." + fi +} + function run_performance_test_130_pods() { echo "Running performance tests against cluster" RUNNING_PERFORMANCE=true @@ -58,19 +75,7 @@ function run_performance_test_130_pods() { DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) filename="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" - echo $filename - - echo $(date +"%m-%d-%Y-%T") >> $filename - echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename - echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename - echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename - - cat $filename - if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then - aws s3 cp $filename $PERFORMANCE_TEST_S3_BUCKET_NAME - else - echo "No S3 bucket name given, skipping test result upload." - fi + save_results_to_file echo "TIMELINE: 130 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false @@ -130,19 +135,7 @@ function run_performance_test_730_pods() { DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) filename="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" - echo $filename - - echo $(date +"%m-%d-%Y-%T") >> $filename - echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename - echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename - echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename - - cat $filename - if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then - aws s3 cp $filename $PERFORMANCE_TEST_S3_BUCKET_NAME - else - echo "No S3 bucket name given, skipping test result upload." - fi + save_results_to_file echo "TIMELINE: 730 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false @@ -202,19 +195,7 @@ function run_performance_test_5000_pods() { DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) filename="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" - echo $filename - - echo $(date +"%m-%d-%Y-%T") >> $filename - echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename - echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename - echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename - - cat $filename - if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then - aws s3 cp $filename $PERFORMANCE_TEST_S3_BUCKET_NAME - else - echo "No S3 bucket name given, skipping test result upload." - fi + save_results_to_file echo "TIMELINE: 5000 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index 86e7c43e7c..54e0301d88 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -219,6 +219,9 @@ if [[ $TEST_PASS -eq 0 && "$RUN_CONFORMANCE" == true ]]; then fi if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then + echo "*******************************************************************************" + echo "Running performance tests on current image:" + echo "" START=$SECONDS run_performance_test_130_pods run_performance_test_730_pods diff --git a/test/integration/README.md b/test/integration/README.md index 0e6bf7898d..b8d3ff234b 100644 --- a/test/integration/README.md +++ b/test/integration/README.md @@ -52,7 +52,7 @@ ## How to Manually delete k8s tester Resources (order of deletion) -Cloudformation - (all except cluster, vpc, rol-mng) +Cloudformation - (all except cluster, vpc) EC2 - load balancers, key pair VPC - Nat gateways, Elastic IPs(after a minute), internet gateway Cloudformation - cluster From 07b54f29ca195d21ea9e50d07644aa1f0bd33db2 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Thu, 9 Jul 2020 17:24:37 -0400 Subject: [PATCH 05/43] Scale up node group before running 5000 pod test. --- scripts/lib/cluster.sh | 2 +- scripts/lib/performance_tests.sh | 12 ++++++++++++ scripts/run-integration-tests.sh | 1 + test/integration/README.md | 3 ++- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index 212acab0ee..59bd0e77aa 100644 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -14,7 +14,7 @@ function down-test-cluster() { function up-test-cluster() { MNGS="" if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then - MNGS='{"three-nodes":{"name":"three-nodes","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "single-node":{"name":"single-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "multi-node":{"name":"multi-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":98,"instance-types":["m5.xlarge"],"volume-size":40}}' + MNGS='{"three-nodes":{"name":"three-nodes","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "single-node":{"name":"single-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "multi-node":{"name":"multi-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":1,"instance-types":["m5.xlarge"],"volume-size":40}}' RUN_CONFORMANCE=false : "${PERFORMANCE_TEST_S3_BUCKET_NAME:=""}" else diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index b84bdcfe47..61d0b5d1e9 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -142,6 +142,18 @@ function run_performance_test_730_pods() { $KUBECTL_PATH delete -f ./testdata/deploy-730-pods.yaml } +function scale_nodes_for_5000_pod_test() { + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B9 98,) + AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_INFO%%:*}") + AUTO_SCALE_GROUP_NAME="${AUTO_SCALE_GROUP_NAME_WITH_QUOTES%\"}" + AUTO_SCALE_GROUP_NAME=$(echo $AUTO_SCALE_GROUP_NAME | cut -c2-) + echo $AUTO_SCALE_GROUP_NAME + + aws autoscaling update-auto-scaling-group \ + --auto-scaling-group-name $AUTO_SCALE_GROUP_NAME \ + --desired-capacity 98 +} + function run_performance_test_5000_pods() { echo "Running performance tests against cluster" RUNNING_PERFORMANCE=true diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index 54e0301d88..e717a29bcf 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -225,6 +225,7 @@ if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then START=$SECONDS run_performance_test_130_pods run_performance_test_730_pods + scale_nodes_for_5000_pod_test run_performance_test_5000_pods PERFORMANCE_DURATION=$((SECONDS - START)) fi diff --git a/test/integration/README.md b/test/integration/README.md index b8d3ff234b..0328f66ed3 100644 --- a/test/integration/README.md +++ b/test/integration/README.md @@ -58,4 +58,5 @@ VPC - Nat gateways, Elastic IPs(after a minute), internet gateway Cloudformation - cluster EC2 - network interfaces, security groups VPC - subnet, route tables -Cloudformation - cluster, vpc(after cluster deletes) \ No newline at end of file +Cloudformation - cluster, vpc(after cluster deletes) +S3 - delete bucket \ No newline at end of file From 55220bd9fdee60c51fa5a43dabb9ffd3f0cb9d68 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Thu, 9 Jul 2020 17:38:02 -0400 Subject: [PATCH 06/43] Create unique mng names. --- scripts/lib/cluster.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index 59bd0e77aa..d483ae5484 100644 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -14,7 +14,7 @@ function down-test-cluster() { function up-test-cluster() { MNGS="" if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then - MNGS='{"three-nodes":{"name":"three-nodes","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "single-node":{"name":"single-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "multi-node":{"name":"multi-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":1,"instance-types":["m5.xlarge"],"volume-size":40}}' + MNGS='{"GetRef.Name-mng-three-nodes":{"name":"GetRef.Name-mng-three-nodes","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "GetRef.Name-mng-single-node":{"name":"GetRef.Name-mng-single-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "GetRef.Name-mng-multi-node":{"name":"GetRef.Name-mng-multi-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":1,"instance-types":["m5.xlarge"],"volume-size":40}}' RUN_CONFORMANCE=false : "${PERFORMANCE_TEST_S3_BUCKET_NAME:=""}" else From ef4df241abee212084bf0a04228e0255787f1e9a Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 10 Jul 2020 12:20:39 -0400 Subject: [PATCH 07/43] Undo generated mng names. --- scripts/lib/cluster.sh | 2 +- testdata/deploy-130-pods.yaml | 2 +- testdata/deploy-5000-pods.yaml | 2 +- testdata/deploy-730-pods.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index d483ae5484..005c0ca46d 100644 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -14,7 +14,7 @@ function down-test-cluster() { function up-test-cluster() { MNGS="" if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then - MNGS='{"GetRef.Name-mng-three-nodes":{"name":"GetRef.Name-mng-three-nodes","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "GetRef.Name-mng-single-node":{"name":"GetRef.Name-mng-single-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "GetRef.Name-mng-multi-node":{"name":"GetRef.Name-mng-multi-node","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":1,"instance-types":["m5.xlarge"],"volume-size":40}}' + MNGS='{"cni-test-three-nodes-mng":{"name":"cni-test-three-nodes-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "cni-test-single-node-mng":{"name":"cni-test-single-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "cni-test-multi-node-mng":{"name":"cni-test-multi-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":1,"instance-types":["m5.xlarge"],"volume-size":40}}' RUN_CONFORMANCE=false : "${PERFORMANCE_TEST_S3_BUCKET_NAME:=""}" else diff --git a/testdata/deploy-130-pods.yaml b/testdata/deploy-130-pods.yaml index 1f4552d3b3..06b5dc8e78 100644 --- a/testdata/deploy-130-pods.yaml +++ b/testdata/deploy-130-pods.yaml @@ -23,4 +23,4 @@ spec: containerPort: 80 imagePullPolicy: IfNotPresent nodeSelector: - eks.amazonaws.com/nodegroup: three-nodes + eks.amazonaws.com/nodegroup: cni-test-three-nodes-mng diff --git a/testdata/deploy-5000-pods.yaml b/testdata/deploy-5000-pods.yaml index cb760f81fc..6ef572dffa 100644 --- a/testdata/deploy-5000-pods.yaml +++ b/testdata/deploy-5000-pods.yaml @@ -23,4 +23,4 @@ spec: containerPort: 80 imagePullPolicy: IfNotPresent nodeSelector: - eks.amazonaws.com/nodegroup: multi-node + eks.amazonaws.com/nodegroup: cni-test-multi-node-mng diff --git a/testdata/deploy-730-pods.yaml b/testdata/deploy-730-pods.yaml index 48db130811..97a4b913f4 100644 --- a/testdata/deploy-730-pods.yaml +++ b/testdata/deploy-730-pods.yaml @@ -23,4 +23,4 @@ spec: containerPort: 80 imagePullPolicy: IfNotPresent nodeSelector: - eks.amazonaws.com/nodegroup: single-node + eks.amazonaws.com/nodegroup: cni-test-single-node-mng From 5a40307ea1c88184c188019a5e35dec749695501 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 10 Jul 2020 13:13:34 -0400 Subject: [PATCH 08/43] Update data files for performance tests. --- scripts/lib/performance_tests.sh | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 61d0b5d1e9..856cc33d29 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -7,12 +7,10 @@ function check_for_timeout() { function save_results_to_file() { echo $filename - - echo $(date +"%m-%d-%Y-%T") >> $filename - echo $TEST_IMAGE_VERSION >> $filename - echo $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename - echo $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename - echo $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename + echo "Date", "\"slot1\"", "\"slot2\"" >> $filename + echo $(date +"%m-%d-%Y-%T"), $((SCALE_UP_DURATION_ARRAY[0])), $((SCALE_DOWN_DURATION_ARRAY[0])) >> $filename + echo $(date +"%m-%d-%Y-%T"), $((SCALE_UP_DURATION_ARRAY[1])), $((SCALE_DOWN_DURATION_ARRAY[1])) >> $filename + echo $(date +"%m-%d-%Y-%T"), $((SCALE_UP_DURATION_ARRAY[2])), $((SCALE_DOWN_DURATION_ARRAY[2])) >> $filename cat $filename if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then @@ -74,7 +72,7 @@ function run_performance_test_130_pods() { echo "" DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) - filename="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" + filename="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" save_results_to_file echo "TIMELINE: 130 Pod performance test took $DEPLOY_DURATION seconds." @@ -134,7 +132,7 @@ function run_performance_test_730_pods() { echo "" DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) - filename="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" + filename="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" save_results_to_file echo "TIMELINE: 730 Pod performance test took $DEPLOY_DURATION seconds." @@ -206,7 +204,7 @@ function run_performance_test_5000_pods() { echo "" DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) - filename="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T").csv" + filename="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" save_results_to_file echo "TIMELINE: 5000 Pod performance test took $DEPLOY_DURATION seconds." From 542382befda778b2f26e33cf8625dd668e0eb666 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 10 Jul 2020 15:34:27 -0400 Subject: [PATCH 09/43] Fix auto scaling group name. --- scripts/lib/performance_tests.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 856cc33d29..2f8637b032 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -141,8 +141,9 @@ function run_performance_test_730_pods() { } function scale_nodes_for_5000_pod_test() { - AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B9 98,) - AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_INFO%%:*}") + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B8 100,) + AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_INFO%%,*}") + AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_NAME_WITH_QUOTES##* }") AUTO_SCALE_GROUP_NAME="${AUTO_SCALE_GROUP_NAME_WITH_QUOTES%\"}" AUTO_SCALE_GROUP_NAME=$(echo $AUTO_SCALE_GROUP_NAME | cut -c2-) echo $AUTO_SCALE_GROUP_NAME From 40173e0d8b7ea801aea36c0dcf8103fb45100c32 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 10 Jul 2020 17:56:54 -0400 Subject: [PATCH 10/43] Debug 5000 pod performance test. --- scripts/lib/performance_tests.sh | 4 ++++ scripts/run-integration-tests.sh | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 2f8637b032..ad83c8a02e 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -142,9 +142,13 @@ function run_performance_test_730_pods() { function scale_nodes_for_5000_pod_test() { AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B8 100,) + echo "Group info ${AUTO_SCALE_GROUP_INFO}" AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_INFO%%,*}") + echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_NAME_WITH_QUOTES##* }") + echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" AUTO_SCALE_GROUP_NAME="${AUTO_SCALE_GROUP_NAME_WITH_QUOTES%\"}" + echo "Group name ${AUTO_SCALE_GROUP_NAME}" AUTO_SCALE_GROUP_NAME=$(echo $AUTO_SCALE_GROUP_NAME | cut -c2-) echo $AUTO_SCALE_GROUP_NAME diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index e717a29bcf..accd571adc 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -223,10 +223,10 @@ if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then echo "Running performance tests on current image:" echo "" START=$SECONDS - run_performance_test_130_pods - run_performance_test_730_pods scale_nodes_for_5000_pod_test run_performance_test_5000_pods + run_performance_test_130_pods + run_performance_test_730_pods PERFORMANCE_DURATION=$((SECONDS - START)) fi From 615517a4ce75605f790e8d182c27c118d5660931 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 10 Jul 2020 19:11:12 -0400 Subject: [PATCH 11/43] Debug to find out why autoscale group info isn't being retrieved. --- scripts/lib/performance_tests.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index ad83c8a02e..7b27a64be6 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -141,7 +141,7 @@ function run_performance_test_730_pods() { } function scale_nodes_for_5000_pod_test() { - AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B8 100,) + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B20 100,) echo "Group info ${AUTO_SCALE_GROUP_INFO}" AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_INFO%%,*}") echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" @@ -151,6 +151,7 @@ function scale_nodes_for_5000_pod_test() { echo "Group name ${AUTO_SCALE_GROUP_NAME}" AUTO_SCALE_GROUP_NAME=$(echo $AUTO_SCALE_GROUP_NAME | cut -c2-) echo $AUTO_SCALE_GROUP_NAME + sleep 2000 aws autoscaling update-auto-scaling-group \ --auto-scaling-group-name $AUTO_SCALE_GROUP_NAME \ From dcf5d4a8fc5d89558aeb3933632b888f319ef76a Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Sat, 11 Jul 2020 03:56:02 -0400 Subject: [PATCH 12/43] Adjust name line number. --- scripts/lib/performance_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 7b27a64be6..47788f088f 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -141,7 +141,7 @@ function run_performance_test_730_pods() { } function scale_nodes_for_5000_pod_test() { - AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B20 100,) + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B18 100,) echo "Group info ${AUTO_SCALE_GROUP_INFO}" AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_INFO%%,*}") echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" From c7c37011928dec3b83249cc5a19a4df7ec5d29df Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Sat, 11 Jul 2020 16:41:43 -0400 Subject: [PATCH 13/43] Remove long debugging sleep. --- scripts/lib/performance_tests.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 47788f088f..d4431a676f 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -151,7 +151,6 @@ function scale_nodes_for_5000_pod_test() { echo "Group name ${AUTO_SCALE_GROUP_NAME}" AUTO_SCALE_GROUP_NAME=$(echo $AUTO_SCALE_GROUP_NAME | cut -c2-) echo $AUTO_SCALE_GROUP_NAME - sleep 2000 aws autoscaling update-auto-scaling-group \ --auto-scaling-group-name $AUTO_SCALE_GROUP_NAME \ From 30609900adf96d045a26e783daf845b42a817669 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Sun, 12 Jul 2020 18:30:41 -0400 Subject: [PATCH 14/43] Add failure checking for performance tests. --- scripts/lib/performance_tests.sh | 63 +++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index d4431a676f..5a429fd240 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -1,7 +1,10 @@ function check_for_timeout() { - if [[ $((SECONDS - $1)) -gt 10000 ]]; then - RUNNING_PERFORMANCE=false - on_error + if [[ $((SECONDS - $1)) -gt 1500 ]]; then + FAILURE_COUNT=$((FAILURE_COUNT + 1)) + if [[ $FAILURE_COUNT -gt 1 ]]; then + RUNNING_PERFORMANCE=false + on_error + fi fi } @@ -26,22 +29,26 @@ function run_performance_test_130_pods() { $KUBECTL_PATH apply -f ./testdata/deploy-130-pods.yaml DEPLOY_START=$SECONDS + FAILURE_COUNT=0 SCALE_UP_DURATION_ARRAY=() SCALE_DOWN_DURATION_ARRAY=() - while [ ${#SCALE_UP_DURATION_ARRAY[@]} -lt 3 ] + while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ] do ITERATION_START=$SECONDS + HAS_FAILED=false $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=130 - while [[ ! $($KUBECTL_PATH get deploy | grep 130/130) ]] + while [[ ! $($KUBECTL_PATH get deploy | grep 130/130) && "$HAS_FAILED" == false ]] do sleep 1 echo "Scaling UP" echo $($KUBECTL_PATH get deploy) - check_for_timeout $DEPLOY_START + check_for_timeout $ITERATION_START done - SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + if [[ "$HAS_FAILED" == false ]]; then + SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + fi MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=0 while [[ $($KUBECTL_PATH get pods) ]] @@ -49,9 +56,11 @@ function run_performance_test_130_pods() { sleep 1 echo "Scaling DOWN" echo $($KUBECTL_PATH get deploy) - check_for_timeout $DEPLOY_START + check_for_timeout $ITERATION_START done - SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + if [[ "$HAS_FAILED" == false ]]; then + SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + fi done echo "Times to scale up:" @@ -86,22 +95,26 @@ function run_performance_test_730_pods() { $KUBECTL_PATH apply -f ./testdata/deploy-730-pods.yaml DEPLOY_START=$SECONDS + FAILURE_COUNT=0 SCALE_UP_DURATION_ARRAY=() SCALE_DOWN_DURATION_ARRAY=() - while [ ${#SCALE_UP_DURATION_ARRAY[@]} -lt 3 ] + while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ] do ITERATION_START=$SECONDS + HAS_FAILED=false $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=730 - while [[ ! $($KUBECTL_PATH get deploy | grep 730/730) ]] + while [[ ! $($KUBECTL_PATH get deploy | grep 730/730) && "$HAS_FAILED" == false ]] do sleep 2 echo "Scaling UP" echo $($KUBECTL_PATH get deploy) - check_for_timeout $DEPLOY_START + check_for_timeout $ITERATION_START done - SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + if [[ "$HAS_FAILED" == false ]]; then + SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + fi MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=0 while [[ $($KUBECTL_PATH get pods) ]] @@ -109,9 +122,11 @@ function run_performance_test_730_pods() { sleep 2 echo "Scaling DOWN" echo $($KUBECTL_PATH get deploy) - check_for_timeout $DEPLOY_START + check_for_timeout $ITERATION_START done - SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + if [[ "$HAS_FAILED" == false ]]; then + SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + fi done echo "Times to scale up:" @@ -163,22 +178,26 @@ function run_performance_test_5000_pods() { $KUBECTL_PATH apply -f ./testdata/deploy-5000-pods.yaml DEPLOY_START=$SECONDS + FAILURE_COUNT=0 SCALE_UP_DURATION_ARRAY=() SCALE_DOWN_DURATION_ARRAY=() - while [ ${#SCALE_UP_DURATION_ARRAY[@]} -lt 3 ] + while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ] do ITERATION_START=$SECONDS + HAS_FAILED=false $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=5000 - while [[ ! $($KUBECTL_PATH get deploy | grep 5000/5000) ]] + while [[ ! $($KUBECTL_PATH get deploy | grep 5000/5000) && "$HAS_FAILED" == false ]] do sleep 2 echo "Scaling UP" echo $($KUBECTL_PATH get deploy) - check_for_timeout $DEPLOY_START + check_for_timeout $ITERATION_START done - SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + if [[ "$HAS_FAILED" == false ]]; then + SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + fi MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=0 while [[ $($KUBECTL_PATH get pods) ]] @@ -186,9 +205,11 @@ function run_performance_test_5000_pods() { sleep 2 echo "Scaling DOWN" echo $($KUBECTL_PATH get deploy) - check_for_timeout $DEPLOY_START + check_for_timeout $ITERATION_START done - SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + if [[ "$HAS_FAILED" == false ]]; then + SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + fi done echo "Times to scale up:" From 42b79e95d5858027763b308f551a268c8bc65375 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Mon, 13 Jul 2020 12:07:56 -0400 Subject: [PATCH 15/43] Updated failure checking fixes. --- scripts/lib/performance_tests.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 5a429fd240..13d5e56ccf 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -1,6 +1,7 @@ function check_for_timeout() { if [[ $((SECONDS - $1)) -gt 1500 ]]; then FAILURE_COUNT=$((FAILURE_COUNT + 1)) + HAS_FAILED=true if [[ $FAILURE_COUNT -gt 1 ]]; then RUNNING_PERFORMANCE=false on_error @@ -56,7 +57,6 @@ function run_performance_test_130_pods() { sleep 1 echo "Scaling DOWN" echo $($KUBECTL_PATH get deploy) - check_for_timeout $ITERATION_START done if [[ "$HAS_FAILED" == false ]]; then SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) @@ -122,7 +122,6 @@ function run_performance_test_730_pods() { sleep 2 echo "Scaling DOWN" echo $($KUBECTL_PATH get deploy) - check_for_timeout $ITERATION_START done if [[ "$HAS_FAILED" == false ]]; then SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) @@ -205,7 +204,6 @@ function run_performance_test_5000_pods() { sleep 2 echo "Scaling DOWN" echo $($KUBECTL_PATH get deploy) - check_for_timeout $ITERATION_START done if [[ "$HAS_FAILED" == false ]]; then SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) From 485d598db8359ea7027e28ed105c93c716116eaa Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Mon, 13 Jul 2020 16:54:11 -0400 Subject: [PATCH 16/43] Upload files to corresponding folders in s3 bucket. --- scripts/lib/performance_tests.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 13d5e56ccf..7a6a6aad1e 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -4,8 +4,10 @@ function check_for_timeout() { HAS_FAILED=true if [[ $FAILURE_COUNT -gt 1 ]]; then RUNNING_PERFORMANCE=false + echo "Failed twice, deprovisioning cluster" on_error fi + echo "Failed once, retrying" fi } @@ -18,7 +20,7 @@ function save_results_to_file() { cat $filename if [[ ${#PERFORMANCE_TEST_S3_BUCKET_NAME} -gt 0 ]]; then - aws s3 cp $filename $PERFORMANCE_TEST_S3_BUCKET_NAME + aws s3 cp $filename ${PERFORMANCE_TEST_S3_BUCKET_NAME}${1} else echo "No S3 bucket name given, skipping test result upload." fi @@ -82,7 +84,7 @@ function run_performance_test_130_pods() { DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) filename="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" - save_results_to_file + save_results_to_file "/130-pods/" echo "TIMELINE: 130 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false @@ -147,7 +149,7 @@ function run_performance_test_730_pods() { DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) filename="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" - save_results_to_file + save_results_to_file "/730-pods/" echo "TIMELINE: 730 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false @@ -229,7 +231,7 @@ function run_performance_test_5000_pods() { DEPLOY_DURATION=$((SECONDS - DEPLOY_START)) filename="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" - save_results_to_file + save_results_to_file "/5000-pods/" echo "TIMELINE: 5000 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false From 2d8d01b9b46cec19df79ee90227c250e07cb79cc Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Mon, 13 Jul 2020 18:28:53 -0400 Subject: [PATCH 17/43] Check for slow performance test WIP. --- scripts/lib/performance_tests.sh | 10 ++++++++++ test/integration/README.md | 1 + 2 files changed, 11 insertions(+) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 7a6a6aad1e..e890b0d7ad 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -26,6 +26,13 @@ function save_results_to_file() { fi } +function check_for_slow_performance() { + BUCKET=s3://cni-scale-test-data${1} + KEY=`aws s3 ls ${BUCKET} | sort | tail -n 1 | awk '{print $4}'` + echo $KEY + aws s3 cp s3://$BUCKET/$KEY ./latest-object +} + function run_performance_test_130_pods() { echo "Running performance tests against cluster" RUNNING_PERFORMANCE=true @@ -85,6 +92,7 @@ function run_performance_test_130_pods() { filename="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" save_results_to_file "/130-pods/" + check_for_slow_performance "/130-pods/" echo "TIMELINE: 130 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false @@ -150,6 +158,7 @@ function run_performance_test_730_pods() { filename="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" save_results_to_file "/730-pods/" + check_for_slow_performance "/730-pods/" echo "TIMELINE: 730 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false @@ -232,6 +241,7 @@ function run_performance_test_5000_pods() { filename="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" save_results_to_file "/5000-pods/" + check_for_slow_performance "/5000-pods/" echo "TIMELINE: 5000 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false diff --git a/test/integration/README.md b/test/integration/README.md index 0328f66ed3..360af74051 100644 --- a/test/integration/README.md +++ b/test/integration/README.md @@ -9,6 +9,7 @@ * run from cni test account to upload test results * set PERFORMANCE_TEST_S3_BUCKET_NAME to the name of the bucket (likely s3://cni-performance-test-data) * set RUN_PERFORMANCE_TESTS=true + * NOTE: if running on previous versions, change the date inside of the file to the date of release so as to not confuse graphing order # KOPS * set RUN_KOPS_TEST=true From efd01513fcb5b6a0bdc9ef3541c549e4feed2aea Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Mon, 13 Jul 2020 19:38:44 -0400 Subject: [PATCH 18/43] Check for slow performance update. --- scripts/lib/performance_tests.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index e890b0d7ad..c208e63ee9 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -28,9 +28,21 @@ function save_results_to_file() { function check_for_slow_performance() { BUCKET=s3://cni-scale-test-data${1} - KEY=`aws s3 ls ${BUCKET} | sort | tail -n 1 | awk '{print $4}'` - echo $KEY - aws s3 cp s3://$BUCKET/$KEY ./latest-object + FILE1=`aws s3 ls ${BUCKET} | sort | tail -n 1 | awk '{print $4}'` + FILE2=`aws s3 ls ${BUCKET} | sort | tail -n 2 | sed '1 p' | awk '{print $4}'` + FILE3=`aws s3 ls ${BUCKET} | sort | tail -n 3 | sed '1 p' | awk '{print $4}'` + + PERFORMANCE_UP_AVERAGE_ARRAY=() + PERFORMANCE_DOWN_AVERAGE_ARRAY=() + #find_performance_duration_average $FILE1 1 +} + +function find_performance_duration_average() { + aws s3 cp s3://$BUCKET/$1 performance_test${2}.csv + SCALE_UP_TEMP_DURATION_ARRAY=() + SCALE_DOWN_TEMP_DURATION_ARRAY=() + cat performance_test${2}.csv | sed '2 p' + } function run_performance_test_130_pods() { From 3d3f14ecb91a4873f2b151c9f9bad381984311e6 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Tue, 14 Jul 2020 01:18:15 -0400 Subject: [PATCH 19/43] Change order of performance tests. --- scripts/run-integration-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index accd571adc..09797a8591 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -224,9 +224,9 @@ if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then echo "" START=$SECONDS scale_nodes_for_5000_pod_test - run_performance_test_5000_pods run_performance_test_130_pods run_performance_test_730_pods + run_performance_test_5000_pods PERFORMANCE_DURATION=$((SECONDS - START)) fi From a5b20865e6081708ebfefa76a49a110a9eb0286d Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Tue, 14 Jul 2020 16:37:10 -0400 Subject: [PATCH 20/43] Attempt performance test fail threshold. --- scripts/lib/performance_tests.sh | 63 ++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index c208e63ee9..6663a60bda 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -32,17 +32,34 @@ function check_for_slow_performance() { FILE2=`aws s3 ls ${BUCKET} | sort | tail -n 2 | sed '1 p' | awk '{print $4}'` FILE3=`aws s3 ls ${BUCKET} | sort | tail -n 3 | sed '1 p' | awk '{print $4}'` - PERFORMANCE_UP_AVERAGE_ARRAY=() - PERFORMANCE_DOWN_AVERAGE_ARRAY=() - #find_performance_duration_average $FILE1 1 + PAST_PERFORMANCE_UP_AVERAGE_SUM=0 + PAST_PERFORMANCE_DOWN_AVERAGE_SUM=0 + find_performance_duration_average $FILE1 1 + find_performance_duration_average $FILE2 2 + find_performance_duration_average $FILE3 3 + PAST_PERFORMANCE_UP_AVERAGE=$((PAST_PERFORMANCE_UP_AVERAGE_SUM / 3)) + PAST_PERFORMANCE_DOWN_AVERAGE=$((PAST_PERFORMANCE_DOWN_AVERAGE_SUM / 3)) + + # Divided by 3 to get current average, multiply past averages by 5/4 to get 25% window + if [[ $((CURRENT_PERFORMANCE_UP_SUM / 3)) -gt $((PAST_PERFORMANCE_UP_AVERAGE * 5 / 4)) ]]; then + echo "FAILURE! Performance test took >25% longer than the past three tests!" + echo "Look into how current changes could cause cni inefficiency." + on_error + fi } function find_performance_duration_average() { aws s3 cp s3://$BUCKET/$1 performance_test${2}.csv - SCALE_UP_TEMP_DURATION_ARRAY=() - SCALE_DOWN_TEMP_DURATION_ARRAY=() - cat performance_test${2}.csv | sed '2 p' - + SCALE_UP_TEMP_DURATION_SUM=0 + SCALE_DOWN_TEMP_DURATION_SUM=0 + for i in {2..4} + do + TEMP=$(sed -n "${i} p" performance_test${2}.csv) + SCALE_UP_TEMP_DURATION_SUM=$((SCALE_UP_TEMP_DURATION_SUM + $(echo "${TEMP%%,*}")))) + SCALE_DOWN_TEMP_DURATION_SUM+=$((SCALE_DOWN_TEMP_DURATION_SUM + ($(echo "${TEMP##*,}")))) + done + PAST_PERFORMANCE_UP_AVERAGE_SUM=($PAST_PERFORMANCE_UP_AVERAGE_SUM + $((SCALE_UP_TEMP_DURATION_SUM / 3))) + PAST_PERFORMANCE_DOWN_AVERAGE_SUM=($PAST_PERFORMANCE_DOWN_AVERAGE_SUM + $((SCALE_DOWN_TEMP_DURATION_SUM / 3))) } function run_performance_test_130_pods() { @@ -55,6 +72,8 @@ function run_performance_test_130_pods() { SCALE_UP_DURATION_ARRAY=() SCALE_DOWN_DURATION_ARRAY=() + CURRENT_PERFORMANCE_UP_SUM=0 + CURRENT_PERFORMANCE_DOWN_SUM=0 while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ] do ITERATION_START=$SECONDS @@ -69,7 +88,9 @@ function run_performance_test_130_pods() { done if [[ "$HAS_FAILED" == false ]]; then - SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + DURATION=$((SECONDS - ITERATION_START)) + SCALE_UP_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION)) fi MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=0 @@ -80,7 +101,9 @@ function run_performance_test_130_pods() { echo $($KUBECTL_PATH get deploy) done if [[ "$HAS_FAILED" == false ]]; then - SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + DURATION=$((SECONDS - ITERATION_START)) + SCALE_DOWN_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_DOWN_SUM=$((CURRENT_PERFORMANCE_DOWN_SUM + DURATION)) fi done @@ -104,10 +127,10 @@ function run_performance_test_130_pods() { filename="pod-130-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" save_results_to_file "/130-pods/" - check_for_slow_performance "/130-pods/" echo "TIMELINE: 130 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false + check_for_slow_performance "/130-pods/" $KUBECTL_PATH delete -f ./testdata/deploy-130-pods.yaml } @@ -135,7 +158,9 @@ function run_performance_test_730_pods() { done if [[ "$HAS_FAILED" == false ]]; then - SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + DURATION=$((SECONDS - ITERATION_START)) + SCALE_UP_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION)) fi MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=0 @@ -146,7 +171,9 @@ function run_performance_test_730_pods() { echo $($KUBECTL_PATH get deploy) done if [[ "$HAS_FAILED" == false ]]; then - SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + DURATION=$((SECONDS - ITERATION_START)) + SCALE_DOWN_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_DOWN_SUM=$((CURRENT_PERFORMANCE_DOWN_SUM + DURATION)) fi done @@ -170,10 +197,10 @@ function run_performance_test_730_pods() { filename="pod-730-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" save_results_to_file "/730-pods/" - check_for_slow_performance "/730-pods/" echo "TIMELINE: 730 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false + check_for_slow_performance "/730-pods/" $KUBECTL_PATH delete -f ./testdata/deploy-730-pods.yaml } @@ -218,7 +245,9 @@ function run_performance_test_5000_pods() { done if [[ "$HAS_FAILED" == false ]]; then - SCALE_UP_DURATION_ARRAY+=( $((SECONDS - ITERATION_START)) ) + DURATION=$((SECONDS - ITERATION_START)) + SCALE_UP_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION)) fi MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=0 @@ -229,7 +258,9 @@ function run_performance_test_5000_pods() { echo $($KUBECTL_PATH get deploy) done if [[ "$HAS_FAILED" == false ]]; then - SCALE_DOWN_DURATION_ARRAY+=($((SECONDS - MIDPOINT_START))) + DURATION=$((SECONDS - ITERATION_START)) + SCALE_DOWN_DURATION_ARRAY+=( $DURATION ) + CURRENT_PERFORMANCE_DOWN_SUM=$((CURRENT_PERFORMANCE_DOWN_SUM + DURATION)) fi done @@ -253,9 +284,9 @@ function run_performance_test_5000_pods() { filename="pod-5000-Test#${TEST_ID}-$(date +"%m-%d-%Y-%T")-${TEST_IMAGE_VERSION}.csv" save_results_to_file "/5000-pods/" - check_for_slow_performance "/5000-pods/" echo "TIMELINE: 5000 Pod performance test took $DEPLOY_DURATION seconds." RUNNING_PERFORMANCE=false + check_for_slow_performance "/5000-pods/" $KUBECTL_PATH delete -f ./testdata/deploy-5000-pods.yaml } From ee6a9acdcb3012acf8007c807b4e798c91a4a2be Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Tue, 14 Jul 2020 18:40:36 -0400 Subject: [PATCH 21/43] Weekly performance test (midnight Wednesday) --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 47d1583564..bf07754b9e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -155,7 +155,7 @@ workflows: weekly-test-run: triggers: - schedule: - cron: "0 0 * * 6" + cron: "0 4 * * 2" filters: branches: only: From 9d0aa34b84b606a9270d1a3cbc4b08bfab48f22d Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Tue, 14 Jul 2020 20:14:48 -0400 Subject: [PATCH 22/43] Fix syntax error and try again. --- scripts/lib/performance_tests.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 6663a60bda..d4b90024a3 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -49,14 +49,14 @@ function check_for_slow_performance() { } function find_performance_duration_average() { - aws s3 cp s3://$BUCKET/$1 performance_test${2}.csv + aws s3 cp $BUCKET/$1 performance_test${2}.csv SCALE_UP_TEMP_DURATION_SUM=0 SCALE_DOWN_TEMP_DURATION_SUM=0 for i in {2..4} do TEMP=$(sed -n "${i} p" performance_test${2}.csv) - SCALE_UP_TEMP_DURATION_SUM=$((SCALE_UP_TEMP_DURATION_SUM + $(echo "${TEMP%%,*}")))) - SCALE_DOWN_TEMP_DURATION_SUM+=$((SCALE_DOWN_TEMP_DURATION_SUM + ($(echo "${TEMP##*,}")))) + SCALE_UP_TEMP_DURATION_SUM=$((SCALE_UP_TEMP_DURATION_SUM + ${TEMP%%,*})) + SCALE_DOWN_TEMP_DURATION_SUM+=$((SCALE_DOWN_TEMP_DURATION_SUM + ${TEMP##*,})) done PAST_PERFORMANCE_UP_AVERAGE_SUM=($PAST_PERFORMANCE_UP_AVERAGE_SUM + $((SCALE_UP_TEMP_DURATION_SUM / 3))) PAST_PERFORMANCE_DOWN_AVERAGE_SUM=($PAST_PERFORMANCE_DOWN_AVERAGE_SUM + $((SCALE_DOWN_TEMP_DURATION_SUM / 3))) @@ -207,9 +207,9 @@ function run_performance_test_730_pods() { function scale_nodes_for_5000_pod_test() { AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B18 100,) echo "Group info ${AUTO_SCALE_GROUP_INFO}" - AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_INFO%%,*}") + AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_INFO%%,*} echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" - AUTO_SCALE_GROUP_NAME_WITH_QUOTES=$(echo "${AUTO_SCALE_GROUP_NAME_WITH_QUOTES##* }") + AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_NAME_WITH_QUOTES##* } echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" AUTO_SCALE_GROUP_NAME="${AUTO_SCALE_GROUP_NAME_WITH_QUOTES%\"}" echo "Group name ${AUTO_SCALE_GROUP_NAME}" From 0325ae5d579da62aac284d8dd2e5a17201dce378 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Wed, 15 Jul 2020 00:18:08 -0400 Subject: [PATCH 23/43] Fix syntax for slow checking. --- scripts/lib/performance_tests.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index d4b90024a3..f1857232b8 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -49,17 +49,17 @@ function check_for_slow_performance() { } function find_performance_duration_average() { - aws s3 cp $BUCKET/$1 performance_test${2}.csv + aws s3 cp ${BUCKET}${1} performance_test${2}.csv SCALE_UP_TEMP_DURATION_SUM=0 SCALE_DOWN_TEMP_DURATION_SUM=0 for i in {2..4} do TEMP=$(sed -n "${i} p" performance_test${2}.csv) SCALE_UP_TEMP_DURATION_SUM=$((SCALE_UP_TEMP_DURATION_SUM + ${TEMP%%,*})) - SCALE_DOWN_TEMP_DURATION_SUM+=$((SCALE_DOWN_TEMP_DURATION_SUM + ${TEMP##*,})) + SCALE_DOWN_TEMP_DURATION_SUM=$((SCALE_DOWN_TEMP_DURATION_SUM + ${TEMP##*,})) done - PAST_PERFORMANCE_UP_AVERAGE_SUM=($PAST_PERFORMANCE_UP_AVERAGE_SUM + $((SCALE_UP_TEMP_DURATION_SUM / 3))) - PAST_PERFORMANCE_DOWN_AVERAGE_SUM=($PAST_PERFORMANCE_DOWN_AVERAGE_SUM + $((SCALE_DOWN_TEMP_DURATION_SUM / 3))) + PAST_PERFORMANCE_UP_AVERAGE_SUM=$(($PAST_PERFORMANCE_UP_AVERAGE_SUM + $((SCALE_UP_TEMP_DURATION_SUM / 3)))) + PAST_PERFORMANCE_DOWN_AVERAGE_SUM=$(($PAST_PERFORMANCE_DOWN_AVERAGE_SUM + $((SCALE_DOWN_TEMP_DURATION_SUM / 3)))) } function run_performance_test_130_pods() { From b76d9b0018c90523b3c9f78b4b3ec581d07e2c90 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Wed, 15 Jul 2020 03:21:13 -0400 Subject: [PATCH 24/43] Proper line splicing. --- scripts/lib/performance_tests.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index f1857232b8..4459a6df4d 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -55,8 +55,9 @@ function find_performance_duration_average() { for i in {2..4} do TEMP=$(sed -n "${i} p" performance_test${2}.csv) - SCALE_UP_TEMP_DURATION_SUM=$((SCALE_UP_TEMP_DURATION_SUM + ${TEMP%%,*})) - SCALE_DOWN_TEMP_DURATION_SUM=$((SCALE_DOWN_TEMP_DURATION_SUM + ${TEMP##*,})) + PAIR=${TEMP#*,} + SCALE_UP_TEMP_DURATION_SUM=$((SCALE_UP_TEMP_DURATION_SUM + ${PAIR%%,*})) + SCALE_DOWN_TEMP_DURATION_SUM=$((SCALE_DOWN_TEMP_DURATION_SUM + ${PAIR##*,})) done PAST_PERFORMANCE_UP_AVERAGE_SUM=$(($PAST_PERFORMANCE_UP_AVERAGE_SUM + $((SCALE_UP_TEMP_DURATION_SUM / 3)))) PAST_PERFORMANCE_DOWN_AVERAGE_SUM=$(($PAST_PERFORMANCE_DOWN_AVERAGE_SUM + $((SCALE_DOWN_TEMP_DURATION_SUM / 3)))) From 591deea469ce3e2fff468779b847c32f23b58cbe Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Thu, 16 Jul 2020 19:30:35 -0400 Subject: [PATCH 25/43] Attempt MNG sharing. --- scripts/lib/cluster.sh | 2 +- scripts/lib/performance_tests.sh | 7 ++++++- scripts/run-integration-tests.sh | 3 ++- testdata/deploy-130-pods.yaml | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index 005c0ca46d..ce0be40ded 100644 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -14,7 +14,7 @@ function down-test-cluster() { function up-test-cluster() { MNGS="" if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then - MNGS='{"cni-test-three-nodes-mng":{"name":"cni-test-three-nodes-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":3,"asg-max-size":3,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}, "cni-test-single-node-mng":{"name":"cni-test-single-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "cni-test-multi-node-mng":{"name":"cni-test-multi-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":1,"instance-types":["m5.xlarge"],"volume-size":40}}' + MNGS='{"cni-test-single-node-mng":{"name":"cni-test-single-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":1,"asg-desired-capacity":1,"instance-types":["m5.16xlarge"],"volume-size":40}, "cni-test-multi-node-mng":{"name":"cni-test-multi-node-mng","remote-access-user-name":"ec2-user","tags":{"group":"amazon-vpc-cni-k8s"},"release-version":"","ami-type":"AL2_x86_64","asg-min-size":1,"asg-max-size":100,"asg-desired-capacity":3,"instance-types":["m5.xlarge"],"volume-size":40}}' RUN_CONFORMANCE=false : "${PERFORMANCE_TEST_S3_BUCKET_NAME:=""}" else diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 4459a6df4d..f9679c68c1 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -42,7 +42,12 @@ function check_for_slow_performance() { # Divided by 3 to get current average, multiply past averages by 5/4 to get 25% window if [[ $((CURRENT_PERFORMANCE_UP_SUM / 3)) -gt $((PAST_PERFORMANCE_UP_AVERAGE * 5 / 4)) ]]; then - echo "FAILURE! Performance test took >25% longer than the past three tests!" + echo "FAILURE! Performance test pod UPPING took >25% longer than the past three tests!" + echo "Look into how current changes could cause cni inefficiency." + on_error + fi + if [[ $((CURRENT_PERFORMANCE_DOWN_SUM / 3)) -gt $((PAST_PERFORMANCE_DOWN_AVERAGE * 5 / 4)) ]]; then + echo "FAILURE! Performance test pod DOWN took >25% longer than the past three tests!" echo "Look into how current changes could cause cni inefficiency." on_error fi diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index 09797a8591..9334f45693 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -157,6 +157,7 @@ echo "Using $BASE_CONFIG_PATH as a template" cp "$BASE_CONFIG_PATH" "$TEST_CONFIG_PATH" # Daemonset template +echo "IMAGE NAME ${IMAGE_NAME} " sed -i'.bak' "s,602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni,$IMAGE_NAME," "$TEST_CONFIG_PATH" sed -i'.bak' "s,:$MANIFEST_IMAGE_VERSION,:$TEST_IMAGE_VERSION," "$TEST_CONFIG_PATH" sed -i'.bak' "s,602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni-init,$INIT_IMAGE_NAME," "$TEST_CONFIG_PATH" @@ -223,8 +224,8 @@ if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then echo "Running performance tests on current image:" echo "" START=$SECONDS - scale_nodes_for_5000_pod_test run_performance_test_130_pods + scale_nodes_for_5000_pod_test run_performance_test_730_pods run_performance_test_5000_pods PERFORMANCE_DURATION=$((SECONDS - START)) diff --git a/testdata/deploy-130-pods.yaml b/testdata/deploy-130-pods.yaml index 06b5dc8e78..74f1f938a6 100644 --- a/testdata/deploy-130-pods.yaml +++ b/testdata/deploy-130-pods.yaml @@ -23,4 +23,4 @@ spec: containerPort: 80 imagePullPolicy: IfNotPresent nodeSelector: - eks.amazonaws.com/nodegroup: cni-test-three-nodes-mng + eks.amazonaws.com/nodegroup: cni-test-multi-node-mng From 5e01f11e9b12fa42036c8d714d0e5d874ea65206 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Thu, 16 Jul 2020 19:33:52 -0400 Subject: [PATCH 26/43] Fix merging issue. --- scripts/run-integration-tests.sh | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index 2568bbb959..35b332aff8 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -30,21 +30,22 @@ __cluster_deprovisioned=0 on_error() { # Make sure we destroy any cluster that was created if we hit run into an - # error when attempting to run tests against the cluster - if [[ $__cluster_created -eq 1 && $__cluster_deprovisioned -eq 0 && "$DEPROVISION" == true ]]; then - if [[ $RUN_KOPS_TEST == true ]]; then - __cluster_deprovisioned=1 - echo "Cluster was provisioned already. Deprovisioning it..." - down-kops-cluster - else - # prevent double-deprovisioning with ctrl-c during deprovisioning... - __cluster_deprovisioned=1 - echo "Cluster was provisioned already. Deprovisioning it..." - down-test-cluster + # error when attempting to run tests against the + if [[ $RUNNING_PERFORMANCE == false ]]; then + if [[ $__cluster_created -eq 1 && $__cluster_deprovisioned -eq 0 && "$DEPROVISION" == true ]]; then + if [[ $RUN_KOPS_TEST == true ]]; then + __cluster_deprovisioned=1 + echo "Cluster was provisioned already. Deprovisioning it..." + down-kops-cluster + else + # prevent double-deprovisioning with ctrl-c during deprovisioning... + __cluster_deprovisioned=1 + echo "Cluster was provisioned already. Deprovisioning it..." + down-test-cluster + fi fi + exit 1 fi - - exit 1 } # test specific config, results location From 711896a6834bcbdc009cc23b0c888e7b3b930c36 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Thu, 16 Jul 2020 19:53:23 -0400 Subject: [PATCH 27/43] Setup weekly test. --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bf07754b9e..2651a037d7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -155,7 +155,7 @@ workflows: weekly-test-run: triggers: - schedule: - cron: "0 4 * * 2" + cron: "0 4 * * 4" filters: branches: only: From 1a5bbfc06ab2de1d6fe3ce87d2629fd9942be6ce Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 02:11:35 -0400 Subject: [PATCH 28/43] Setup weekly cron. --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2651a037d7..1c6fd2d76a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -155,7 +155,7 @@ workflows: weekly-test-run: triggers: - schedule: - cron: "0 4 * * 4" + cron: "0 11 * * 5" filters: branches: only: From e4d603fed74afd900fd7db0bf62141215d568e9f Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 13:09:05 -0400 Subject: [PATCH 29/43] Fix performance test slow checking, add kops to weekly tests. --- .circleci/config.yml | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1c6fd2d76a..5c54e4ad37 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -114,6 +114,38 @@ jobs: - store_artifacts: path: /tmp/cni-test + kops_test: + docker: + - image: circleci/golang:1.13-stretch + working_directory: /go/src/github.com/{{ORG_NAME}}/{{REPO_NAME}} + environment: + <<: *env + RUN_CONFORMANCE: "false" + RUN_KOPS_TEST: "true" + steps: + - checkout + - setup_remote_docker + - aws-cli/setup: + profile-name: awstester + - restore_cache: + keys: + - dependency-packages-store-{{ checksum "test/integration/go.mod" }} + - dependency-packages-store- + - k8s/install-kubectl: + # requires 1.14.9 for k8s testing, since it uses log api. + kubectl-version: v1.14.9 + - run: + name: Run the integration tests + command: ./scripts/run-integration-tests.sh + no_output_timeout: 15m + - save_cache: + key: dependency-packages-store-{{ checksum "test/integration/go.mod" }} + paths: + - /go/pkg + when: always + - store_artifacts: + path: /tmp/cni-test + workflows: version: 2 check: @@ -155,10 +187,11 @@ workflows: weekly-test-run: triggers: - schedule: - cron: "0 11 * * 5" + cron: "15 14 * * 5" filters: branches: only: - master jobs: - performance_test + - kops_test From 7e2ad40f516631a6698230754433af861baaeed6 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 13:09:21 -0400 Subject: [PATCH 30/43] Fix slow checking. --- scripts/lib/performance_tests.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index f9679c68c1..5e634a2ccd 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -29,8 +29,8 @@ function save_results_to_file() { function check_for_slow_performance() { BUCKET=s3://cni-scale-test-data${1} FILE1=`aws s3 ls ${BUCKET} | sort | tail -n 1 | awk '{print $4}'` - FILE2=`aws s3 ls ${BUCKET} | sort | tail -n 2 | sed '1 p' | awk '{print $4}'` - FILE3=`aws s3 ls ${BUCKET} | sort | tail -n 3 | sed '1 p' | awk '{print $4}'` + FILE2=`aws s3 ls ${BUCKET} | sort | tail -n 2 | sed -n '1 p' | awk '{print $4}'` + FILE3=`aws s3 ls ${BUCKET} | sort | tail -n 3 | sed -n '1 p' | awk '{print $4}'` PAST_PERFORMANCE_UP_AVERAGE_SUM=0 PAST_PERFORMANCE_DOWN_AVERAGE_SUM=0 @@ -42,13 +42,21 @@ function check_for_slow_performance() { # Divided by 3 to get current average, multiply past averages by 5/4 to get 25% window if [[ $((CURRENT_PERFORMANCE_UP_SUM / 3)) -gt $((PAST_PERFORMANCE_UP_AVERAGE * 5 / 4)) ]]; then - echo "FAILURE! Performance test pod UPPING took >25% longer than the past three tests!" + echo "FAILURE! Performance test pod UPPING took >25% longer than the past three tests" + echo "This tests time: $((CURRENT_PERFORMANCE_UP_SUM / 3))" + echo "Previous tests' time: ${PAST_PERFORMANCE_UP_AVERAGE}" + echo "********************************" echo "Look into how current changes could cause cni inefficiency." + echo "********************************" on_error fi if [[ $((CURRENT_PERFORMANCE_DOWN_SUM / 3)) -gt $((PAST_PERFORMANCE_DOWN_AVERAGE * 5 / 4)) ]]; then echo "FAILURE! Performance test pod DOWN took >25% longer than the past three tests!" + echo "This tests time: $((CURRENT_PERFORMANCE_DOWN_SUM / 3))" + echo "Previous tests' time: ${PAST_PERFORMANCE_DOWN_AVERAGE}" + echo "********************************" echo "Look into how current changes could cause cni inefficiency." + echo "********************************" on_error fi } From d43d4a685bdcd9ffc8a80e81a2a0a4a52cd7882d Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 15:04:30 -0400 Subject: [PATCH 31/43] Find autoscaling group name. --- scripts/lib/performance_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 5e634a2ccd..47dbd0b22d 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -219,7 +219,7 @@ function run_performance_test_730_pods() { } function scale_nodes_for_5000_pod_test() { - AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B18 100,) + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B25 100,) echo "Group info ${AUTO_SCALE_GROUP_INFO}" AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_INFO%%,*} echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" From c5c95645c519d59b95e53c1cb7c55336e765c354 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 16:08:18 -0400 Subject: [PATCH 32/43] Try to find autoscaling group name. --- scripts/lib/performance_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 47dbd0b22d..07db41a584 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -219,7 +219,7 @@ function run_performance_test_730_pods() { } function scale_nodes_for_5000_pod_test() { - AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B25 100,) + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B40 100,) echo "Group info ${AUTO_SCALE_GROUP_INFO}" AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_INFO%%,*} echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" From 449f88c6e501e828c669b9774f54f026724c9c8e Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 16:21:09 -0400 Subject: [PATCH 33/43] Fix weekly test syntax. --- .circleci/config.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5c54e4ad37..b20342c81c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -187,7 +187,7 @@ workflows: weekly-test-run: triggers: - schedule: - cron: "15 14 * * 5" + cron: "0 0 * * 6" filters: branches: only: @@ -195,3 +195,5 @@ workflows: jobs: - performance_test - kops_test + requires: + - performance_test From 082901b66652bb6f3653ded3e62dcf422b3eaad6 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 16:21:49 -0400 Subject: [PATCH 34/43] Scale up to 99 nodes. --- scripts/lib/performance_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 07db41a584..de2c3825b5 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -232,7 +232,7 @@ function scale_nodes_for_5000_pod_test() { aws autoscaling update-auto-scaling-group \ --auto-scaling-group-name $AUTO_SCALE_GROUP_NAME \ - --desired-capacity 98 + --desired-capacity 99 } function run_performance_test_5000_pods() { From 4811d74ce1f18279f77b3f386c3be5d21bf25b2f Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 16:24:24 -0400 Subject: [PATCH 35/43] Fix yaml syntax. --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b20342c81c..5d4b8190fa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -194,6 +194,6 @@ workflows: - master jobs: - performance_test - - kops_test - requires: + - kops_test: + requires: - performance_test From f674a55359a7ca720a1c09224f75f62557b1e3a5 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 17:57:56 -0400 Subject: [PATCH 36/43] Update readme with new tests. --- test/integration/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/integration/README.md b/test/integration/README.md index 360af74051..d798f31211 100644 --- a/test/integration/README.md +++ b/test/integration/README.md @@ -4,6 +4,7 @@ * set AWS_SECRET_ACCESS_KEY * set AWS_DEFAULT_REGION (optional, defaults to us-west-2 if not set) * approve test after build completes + * Can only run one of the following tests at a time, as most need a unique cluster to work on # Performance * run from cni test account to upload test results @@ -13,7 +14,13 @@ # KOPS * set RUN_KOPS_TEST=true - * will occassionally fail/flake tests, try re-running test a couple times to ensure there is a problem + * WARNING: will occassionally fail/flake tests, try re-running test a couple times to ensure there is a + +# Warm IP + * set RUN_WARM_IP_TEST=true + +# Warm eni + * set RUN_WARM_ENI_TEST=true From 487ce5073341f5e4b47af5454db1b9b4996313d6 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 19:05:00 -0400 Subject: [PATCH 37/43] Look back 70 lines. --- scripts/lib/performance_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index de2c3825b5..27c7d4614d 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -219,7 +219,7 @@ function run_performance_test_730_pods() { } function scale_nodes_for_5000_pod_test() { - AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B40 100,) + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B70 100,) echo "Group info ${AUTO_SCALE_GROUP_INFO}" AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_INFO%%,*} echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" From 439ea170d12cf02a275ffe5d30b48d8a17dfb007 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 19:58:12 -0400 Subject: [PATCH 38/43] Alternate way to get autoscaling group name. --- scripts/lib/performance_tests.sh | 6 ++++-- scripts/run-integration-tests.sh | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 27c7d4614d..d92169fc38 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -218,8 +218,8 @@ function run_performance_test_730_pods() { $KUBECTL_PATH delete -f ./testdata/deploy-730-pods.yaml } -function scale_nodes_for_5000_pod_test() { - AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B70 100,) +function get_100_node_autoscaling_group_name() { + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B18 100,) echo "Group info ${AUTO_SCALE_GROUP_INFO}" AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_INFO%%,*} echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" @@ -229,7 +229,9 @@ function scale_nodes_for_5000_pod_test() { echo "Group name ${AUTO_SCALE_GROUP_NAME}" AUTO_SCALE_GROUP_NAME=$(echo $AUTO_SCALE_GROUP_NAME | cut -c2-) echo $AUTO_SCALE_GROUP_NAME +} +function scale_nodes_for_5000_pod_test() { aws autoscaling update-auto-scaling-group \ --auto-scaling-group-name $AUTO_SCALE_GROUP_NAME \ --desired-capacity 99 diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index 35b332aff8..d08191b7d0 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -244,6 +244,7 @@ if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then echo "Running performance tests on current image:" echo "" START=$SECONDS + get_100_node_autoscaling_group_name run_performance_test_130_pods scale_nodes_for_5000_pod_test run_performance_test_730_pods From a2f8c274e9ca44d0791a156eddcafe84107c6ffe Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 20:40:19 -0400 Subject: [PATCH 39/43] Change weekly test time. --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5d4b8190fa..79b6576559 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -183,11 +183,11 @@ workflows: jobs: - integration_test - # triggers weekly tests on master + # triggers weekly tests on master (Friday at 11 PM PST) weekly-test-run: triggers: - schedule: - cron: "0 0 * * 6" + cron: "0 6 * * 6" filters: branches: only: From 0e7ef6e1e9bbac0532bf859b6f66e7945f196d5f Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Fri, 17 Jul 2020 22:56:58 -0400 Subject: [PATCH 40/43] Change line finder for autoscaling group name. --- scripts/lib/performance_tests.sh | 6 ++---- scripts/run-integration-tests.sh | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index d92169fc38..ede566bcb5 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -218,8 +218,8 @@ function run_performance_test_730_pods() { $KUBECTL_PATH delete -f ./testdata/deploy-730-pods.yaml } -function get_100_node_autoscaling_group_name() { - AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B18 100,) +function scale_nodes_for_5000_pod_test() { + AUTO_SCALE_GROUP_INFO=$(aws autoscaling describe-auto-scaling-groups | grep -B44 100,) echo "Group info ${AUTO_SCALE_GROUP_INFO}" AUTO_SCALE_GROUP_NAME_WITH_QUOTES=${AUTO_SCALE_GROUP_INFO%%,*} echo "Group name with quotes ${AUTO_SCALE_GROUP_NAME_WITH_QUOTES}" @@ -229,9 +229,7 @@ function get_100_node_autoscaling_group_name() { echo "Group name ${AUTO_SCALE_GROUP_NAME}" AUTO_SCALE_GROUP_NAME=$(echo $AUTO_SCALE_GROUP_NAME | cut -c2-) echo $AUTO_SCALE_GROUP_NAME -} -function scale_nodes_for_5000_pod_test() { aws autoscaling update-auto-scaling-group \ --auto-scaling-group-name $AUTO_SCALE_GROUP_NAME \ --desired-capacity 99 diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index d08191b7d0..35b332aff8 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -244,7 +244,6 @@ if [[ "$RUN_PERFORMANCE_TESTS" == true ]]; then echo "Running performance tests on current image:" echo "" START=$SECONDS - get_100_node_autoscaling_group_name run_performance_test_130_pods scale_nodes_for_5000_pod_test run_performance_test_730_pods From 56b011875cf01ffb63421d2186045b2d5729ebca Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Sun, 19 Jul 2020 00:57:57 -0400 Subject: [PATCH 41/43] Only report failures on slow up process. --- scripts/lib/performance_tests.sh | 9 --------- 1 file changed, 9 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index ede566bcb5..2ae516adf6 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -50,15 +50,6 @@ function check_for_slow_performance() { echo "********************************" on_error fi - if [[ $((CURRENT_PERFORMANCE_DOWN_SUM / 3)) -gt $((PAST_PERFORMANCE_DOWN_AVERAGE * 5 / 4)) ]]; then - echo "FAILURE! Performance test pod DOWN took >25% longer than the past three tests!" - echo "This tests time: $((CURRENT_PERFORMANCE_DOWN_SUM / 3))" - echo "Previous tests' time: ${PAST_PERFORMANCE_DOWN_AVERAGE}" - echo "********************************" - echo "Look into how current changes could cause cni inefficiency." - echo "********************************" - on_error - fi } function find_performance_duration_average() { From 18b10c5729e6ea7b87cac1aef36ba8ad492b20b0 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Wed, 22 Jul 2020 12:08:11 -0400 Subject: [PATCH 42/43] Fix 3 most recent files, and reset performance average. --- scripts/lib/performance_tests.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/lib/performance_tests.sh b/scripts/lib/performance_tests.sh index 2ae516adf6..300b60a3a7 100644 --- a/scripts/lib/performance_tests.sh +++ b/scripts/lib/performance_tests.sh @@ -28,9 +28,9 @@ function save_results_to_file() { function check_for_slow_performance() { BUCKET=s3://cni-scale-test-data${1} - FILE1=`aws s3 ls ${BUCKET} | sort | tail -n 1 | awk '{print $4}'` - FILE2=`aws s3 ls ${BUCKET} | sort | tail -n 2 | sed -n '1 p' | awk '{print $4}'` - FILE3=`aws s3 ls ${BUCKET} | sort | tail -n 3 | sed -n '1 p' | awk '{print $4}'` + FILE1=`aws s3 ls ${BUCKET} | sort | tail -n 2 | sed -n '1 p' | awk '{print $4}'` + FILE2=`aws s3 ls ${BUCKET} | sort | tail -n 3 | sed -n '1 p' | awk '{print $4}'` + FILE3=`aws s3 ls ${BUCKET} | sort | tail -n 4 | sed -n '1 p' | awk '{print $4}'` PAST_PERFORMANCE_UP_AVERAGE_SUM=0 PAST_PERFORMANCE_DOWN_AVERAGE_SUM=0 @@ -97,7 +97,6 @@ function run_performance_test_130_pods() { SCALE_UP_DURATION_ARRAY+=( $DURATION ) CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION)) fi - MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-130-pods.yaml --replicas=0 while [[ $($KUBECTL_PATH get pods) ]] do @@ -149,6 +148,8 @@ function run_performance_test_730_pods() { SCALE_UP_DURATION_ARRAY=() SCALE_DOWN_DURATION_ARRAY=() + CURRENT_PERFORMANCE_UP_SUM=0 + CURRENT_PERFORMANCE_DOWN_SUM=0 while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ] do ITERATION_START=$SECONDS @@ -167,7 +168,6 @@ function run_performance_test_730_pods() { SCALE_UP_DURATION_ARRAY+=( $DURATION ) CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION)) fi - MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-730-pods.yaml --replicas=0 while [[ $($KUBECTL_PATH get pods) ]] do @@ -236,6 +236,8 @@ function run_performance_test_5000_pods() { SCALE_UP_DURATION_ARRAY=() SCALE_DOWN_DURATION_ARRAY=() + CURRENT_PERFORMANCE_UP_SUM=0 + CURRENT_PERFORMANCE_DOWN_SUM=0 while [ ${#SCALE_DOWN_DURATION_ARRAY[@]} -lt 3 ] do ITERATION_START=$SECONDS @@ -254,7 +256,6 @@ function run_performance_test_5000_pods() { SCALE_UP_DURATION_ARRAY+=( $DURATION ) CURRENT_PERFORMANCE_UP_SUM=$((CURRENT_PERFORMANCE_UP_SUM + DURATION)) fi - MIDPOINT_START=$SECONDS $KUBECTL_PATH scale -f ./testdata/deploy-5000-pods.yaml --replicas=0 while [[ $($KUBECTL_PATH get pods) ]] do From 9f2e891c8f3239efe147deaf4ce5124580205684 Mon Sep 17 00:00:00 2001 From: Ben Napolitan Date: Wed, 22 Jul 2020 18:54:03 -0400 Subject: [PATCH 43/43] Format fix. --- cmd/routed-eni-cni-plugin/cni.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/routed-eni-cni-plugin/cni.go b/cmd/routed-eni-cni-plugin/cni.go index 7a6d17ec5d..97a7933dd4 100644 --- a/cmd/routed-eni-cni-plugin/cni.go +++ b/cmd/routed-eni-cni-plugin/cni.go @@ -217,13 +217,13 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap ips := []*current.IPConfig{ { - Version: "4", - Address: *addr, + Version: "4", + Address: *addr, }, } result := ¤t.Result{ - IPs: ips, + IPs: ips, } return cniTypes.PrintResult(result, conf.CNIVersion)