Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GCE scale testing on kops #16181

Merged
merged 1 commit into from
Feb 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 66 additions & 83 deletions tests/e2e/scenarios/scalability/run-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,7 @@
set -e
set -x

REPO_ROOT=$(git rev-parse --show-toplevel)
cd "${REPO_ROOT}"
cd ..
WORKSPACE=$(pwd)
cd "${WORKSPACE}/kops"

# Create bindir
BINDIR="${WORKSPACE}/bin"
export PATH="${BINDIR}:${PATH}"
mkdir -p "${BINDIR}"

# Build kubetest-2 kOps support
pushd "${WORKSPACE}/kops"
GOBIN=${BINDIR} make test-e2e-install
popd

# Setup our cleanup function; as we allocate resources we set a variable to indicate they should be cleaned up
function cleanup {
# shellcheck disable=SC2153
if [[ "${DELETE_CLUSTER:-}" == "true" ]]; then
kubetest2 kops "${KUBETEST2_ARGS[@]}" --down || echo "kubetest2 down failed"
fi
}
trap cleanup EXIT
make test-e2e-install

# Default cluster name
SCRIPT_NAME=$(basename "$(dirname "$0")")
Expand All @@ -53,30 +30,26 @@ if [[ -z "${K8S_VERSION:-}" ]]; then
K8S_VERSION="$(curl -s -L https://dl.k8s.io/release/stable.txt)"
fi

# Download latest prebuilt kOps
if [[ -z "${KOPS_BASE_URL:-}" ]]; then
KOPS_BASE_URL="$(curl -s https://storage.googleapis.com/kops-ci/bin/latest-ci.txt)"
fi
export KOPS_BASE_URL

KOPS_BIN=${BINDIR}/kops
wget -qO "${KOPS_BIN}" "$KOPS_BASE_URL/$(go env GOOS)/$(go env GOARCH)/kops"
chmod +x "${KOPS_BIN}"
# A temp patch for kubetest2 https://github.com/kubernetes-sigs/kubetest2/pull/256
git clone https://github.com/kubernetes-sigs/kubetest2.git /tmp/kubetest2
pushd /tmp/kubetest2
make install-all
popd

# Default cloud provider to aws
if [[ -z "${CLOUD_PROVIDER:-}" ]]; then
CLOUD_PROVIDER="aws"
fi
echo "CLOUD_PROVIDER=${CLOUD_PROVIDER}"

# KOPS_STATE_STORE holds metadata about the clusters we create
if [[ -z "${KOPS_STATE_STORE:-}" ]]; then
echo "Must specify KOPS_STATE_STORE"
exit 1
if [[ "${CLOUD_PROVIDER}" != "gce" ]]; then
# KOPS_STATE_STORE holds metadata about the clusters we create
if [[ -z "${KOPS_STATE_STORE:-}" ]]; then
echo "Must specify KOPS_STATE_STORE"
exit 1
fi
echo "KOPS_STATE_STORE=${KOPS_STATE_STORE}"
export KOPS_STATE_STORE
fi
echo "KOPS_STATE_STORE=${KOPS_STATE_STORE}"
export KOPS_STATE_STORE


if [[ -z "${ADMIN_ACCESS:-}" ]]; then
ADMIN_ACCESS="0.0.0.0/0" # Or use your IPv4 with /32
Expand All @@ -86,12 +59,25 @@ echo "ADMIN_ACCESS=${ADMIN_ACCESS}"
# cilium does not yet pass conformance tests (shared hostport test)
#create_args="--networking cilium"
create_args=()
create_args=("--network-cidr=10.0.0.0/16,10.1.0.0/16,10.2.0.0/16,10.3.0.0/16,10.4.0.0/16,10.5.0.0/16,10.6.0.0/16,10.7.0.0/16,10.8.0.0/16,10.9.0.0/16,10.10.0.0/16,10.11.0.0/16,10.12.0.0/16")
if [[ "${CLOUD_PROVIDER}" == "aws" ]]; then
create_args+=("--network-cidr=10.0.0.0/16,10.1.0.0/16,10.2.0.0/16,10.3.0.0/16,10.4.0.0/16,10.5.0.0/16,10.6.0.0/16,10.7.0.0/16,10.8.0.0/16,10.9.0.0/16,10.10.0.0/16,10.11.0.0/16,10.12.0.0/16")
create_args+=("--node-size=t3a.medium,t3.medium,t2.medium,t3a.large,c5a.large,t3.large,c5.large,m5a.large,m6a.large,m5.large,c4.large,c7a.large,r5a.large,r6a.large,m7a.large")
create_args+=("--node-volume-size=20")
create_args+=("--zones=us-east-2a,us-east-2b,us-east-2c")
create_args+=("--image=${INSTANCE_IMAGE:-ssm:/aws/service/canonical/ubuntu/server/20.04/stable/current/amd64/hvm/ebs-gp2/ami-id}")
fi
if [[ "${CLOUD_PROVIDER}" == "gce" ]]; then
create_args+=("--zones=us-east1-b,us-east1-c,us-east1-d")
create_args+=("--node-size=e2-standard-2")
create_args+=("--node-volume-size=30")
create_args+=("--master-volume-size=1000")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting to see the volume size to be 1000, out of curiosity, why would it need 1000 ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bigger disks have more iops on GCE

create_args+=("--gce-service-account=default")
create_args+=("--image=${INSTANCE_IMAGE:-ubuntu-os-cloud/ubuntu-2204-jammy-v20231213a}")
fi
create_args+=("--networking=${CNI_PLUGIN:-calico}")
if [[ "${CNI_PLUGIN}" == "amazonvpc" ]]; then
create_args+=("--set spec.networking.amazonVPC.env=ENABLE_PREFIX_DELEGATION=true")
create_args+=("--set spec.networking.amazonVPC.env=ENABLE_PREFIX_DELEGATION=true")
fi
create_args+=("--image=${INSTANCE_IMAGE:-ssm:/aws/service/canonical/ubuntu/server/20.04/stable/current/amd64/hvm/ebs-gp2/ami-id}")
create_args+=("--set spec.etcdClusters[0].manager.listenMetricsURLs=http://localhost:2382")
create_args+=("--set spec.etcdClusters[0].manager.env=ETCD_QUOTA_BACKEND_BYTES=8589934592")
create_args+=("--set spec.etcdClusters[1].manager.env=ETCD_QUOTA_BACKEND_BYTES=8589934592")
Expand All @@ -114,18 +100,16 @@ create_args+=("--set spec.kubeAPIServer.maxRequestsInflight=800")
create_args+=("--set spec.kubeAPIServer.maxMutatingRequestsInflight=400")
create_args+=("--set spec.kubeAPIServer.enableProfiling=true")
create_args+=("--set spec.kubeAPIServer.enableContentionProfiling=true")
create_args+=("--set spec.kubeAPIServer.logLevel=2")
# this is required for Prometheus server to scrape metrics endpoint on APIServer
create_args+=("--set spec.kubeAPIServer.anonymousAuth=true")
# this is required for prometheus to scrape kube-proxy metrics endpoint
create_args+=("--set spec.kubeProxy.metricsBindAddress=0.0.0.0:10249")
create_args+=("--node-count=${KUBE_NODE_COUNT:-101}")
# TODO: track failures of tests (HostPort & OIDC) when using `--dns=none`
create_args+=("--dns none")
create_args+=("--node-size=t3a.medium,t3.medium,t2.medium,t3a.large,c5a.large,t3.large,c5.large,m5a.large,m6a.large,m5.large,c4.large,c7a.large,r5a.large,r6a.large,m7a.large")
create_args+=("--node-volume-size=20")
create_args+=("--dns=none")
create_args+=("--control-plane-count=${CONTROL_PLANE_COUNT:-1}")
create_args+=("--master-size=${CONTROL_PLANE_SIZE:-c5.2xlarge}")
create_args+=("--zones=us-east-2a,us-east-2b,us-east-2c")


# AWS ONLY feature flags
Expand All @@ -141,10 +125,16 @@ KUBETEST2_ARGS=()
KUBETEST2_ARGS+=("-v=2")
KUBETEST2_ARGS+=("--cloud-provider=${CLOUD_PROVIDER}")
KUBETEST2_ARGS+=("--cluster-name=${CLUSTER_NAME:-}")
KUBETEST2_ARGS+=("--kops-binary-path=${KOPS_BIN}")
KUBETEST2_ARGS+=("--kops-version-marker=https://storage.googleapis.com/k8s-staging-kops/kops/releases/markers/master/latest-ci.txt")
KUBETEST2_ARGS+=("--admin-access=${ADMIN_ACCESS:-}")
KUBETEST2_ARGS+=("--env=KOPS_FEATURE_FLAGS=${KOPS_FEATURE_FLAGS}")

if [[ "${CLOUD_PROVIDER}" == "gce" ]]; then
KUBETEST2_ARGS+=("--boskos-resource-type=scalability-scale-project")
KUBETEST2_ARGS+=("--control-plane-instance-group-overrides=spec.rootVolume.type=pd-ssd")
KUBETEST2_ARGS+=("--max-nodes-to-dump=10")
fi

# More time for bigger clusters
KUBETEST2_ARGS+=("--validation-wait=55m")
KUBETEST2_ARGS+=("--validation-count=3")
Expand All @@ -155,24 +145,18 @@ if [[ -z "${DELETE_CLUSTER:-}" ]]; then
DELETE_CLUSTER="true"
fi

kubetest2 kops "${KUBETEST2_ARGS[@]}" \
--up \
--kubernetes-version="${K8S_VERSION}" \
--create-args="${create_args[*]}"

KUBECONFIG=$(mktemp -t kubeconfig.XXXXXXXXX)
kops export kubecfg --admin --kubeconfig="${KUBECONFIG}"

kops get instances
if [[ "${DELETE_CLUSTER:-}" == "true" ]]; then
KUBETEST2_ARGS+=("--down")
fi

# CL2 uses KUBE_SSH_KEY_PATH path to ssh to instances for scraping metrics
export KUBE_SSH_KEY_PATH="/tmp/kops/${CLUSTER_NAME}/id_ed25519"
# this is used as a label to select kube-proxy pods on kops for kube-proxy service
# used by CL2 Prometheus here https://github.com/kubernetes/perf-tests/blob/master/clusterloader2/pkg/prometheus/manifests/default/kube-proxy-service.yaml#L2
export PROMETHEUS_KUBE_PROXY_SELECTOR_KEY="k8s-app"
export PROMETHEUS_SCRAPE_APISERVER_ONLY="true"
export CL2_PROMETHEUS_TOLERATE_MASTER="true"
if [[ "${CLOUD_PROVIDER}" == "aws" ]]; then
# CL2 uses KUBE_SSH_KEY_PATH path to ssh to instances for scraping metrics
export KUBE_SSH_KEY_PATH="/tmp/kops/${CLUSTER_NAME}/id_ed25519"
cat > "${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/load/overrides.yaml <<EOL
# we are not testing statefulsets at this point
SMALL_STATEFUL_SETS_PER_NAMESPACE: 0
Expand All @@ -182,29 +166,28 @@ if [[ "${CLOUD_PROVIDER}" == "aws" ]]; then
ENABLE_RESTART_COUNT_CHECK: false
EOL
cat "${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/load/overrides.yaml

kubetest2 kops "${KUBETEST2_ARGS[@]}" \
--test=clusterloader2 \
--kubernetes-version="${K8S_VERSION}" \
-- \
--provider="${CLOUD_PROVIDER}" \
--repo-root="${GOPATH}"/src/k8s.io/perf-tests \
--test-configs="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/load/config.yaml \
--test-overrides="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/load/overrides.yaml \
--kube-config="${KUBECONFIG}"
else
kubetest2 kops "${KUBETEST2_ARGS[@]}" \
--test=clusterloader2 \
--kubernetes-version="${K8S_VERSION}" \
-- \
--provider="${CLOUD_PROVIDER}" \
--repo-root="${GOPATH}"/src/k8s.io/perf-tests \
--test-configs="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/load/config.yaml \
--kube-config="${KUBECONFIG}"
cat > "${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/load/overrides.yaml <<EOL
# setting a default value here to avoid an incorrect yaml file
CL2_ENABLE_PVS: true
EOL
fi


if [[ "${DELETE_CLUSTER:-}" == "true" ]]; then
DELETE_CLUSTER=false # Don't delete again in trap
kubetest2 kops "${KUBETEST2_ARGS[@]}" --down
fi
kubetest2 kops "${KUBETEST2_ARGS[@]}" \
--up \
--kubernetes-version="${K8S_VERSION}" \
--create-args="${create_args[*]}" \
--test=clusterloader2 \
-- \
--provider="${CLOUD_PROVIDER}" \
--repo-root="${GOPATH}"/src/k8s.io/perf-tests \
--test-configs="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/load/config.yaml \
--test-overrides="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/load/overrides.yaml \
--extra-args="--experimental-prometheus-snapshot-to-report-dir=true" \
--kube-config="${HOME}/.kube/config"
# --test-overrides="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/experiments/enable_restart_count_check.yaml \
upodroid marked this conversation as resolved.
Show resolved Hide resolved
# --test-overrides="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/experiments/ignore_known_gce_container_restarts.yaml \
# --test-overrides="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/overrides/5000_nodes.yaml \
# --test-overrides="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/load/config.yaml \
# --test-overrides="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/huge-service/config.yaml \
# --test-overrides="${GOPATH}"/src/k8s.io/perf-tests/clusterloader2/testing/access-tokens/config.yaml \
Loading