Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: kill prover node and see it recover #11118

Merged
merged 10 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions spartan/aztec-chaos-scenarios/templates/prover-kill.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{- if .Values.proverKill.enabled }}
---
apiVersion: chaos-mesh.org/v1alpha1
kind: PodChaos
metadata:
name: {{ .Values.global.targetNamespace }}-prover-kill
namespace: {{ .Values.global.chaosMeshNamespace }}
labels:
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
annotations:
"helm.sh/resource-policy": keep
spec:
action: pod-kill
mode: fixed-percent
value: {{ .Values.proverKill.percent | quote }}
selector:
namespaces:
- {{ .Values.global.targetNamespace }}
labelSelectors:
app: prover-node
{{- end }}
4 changes: 4 additions & 0 deletions spartan/aztec-chaos-scenarios/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ proverFailure:
enabled: false
duration: 13m

proverKill:
enabled: false
percent: 100

validatorKill:
enabled: false
percent: 30
Expand Down
6 changes: 6 additions & 0 deletions spartan/aztec-chaos-scenarios/values/prover-kill.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
global:
namespace: "smoke"

proverKill:
enabled: true
percent: 100
6 changes: 6 additions & 0 deletions spartan/aztec-network/templates/prover-agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ spec:
value: "1"
- name: PROVER_AGENT_POLL_INTERVAL_MS
value: "{{ .Values.proverAgent.pollIntervalMs }}"
- name: PROVER_TEST_DELAY_TYPE
value: "{{ .Values.proverAgent.testDelayType }}"
- name: PROVER_TEST_DELAY_MS
value: "{{ .Values.proverAgent.testDelayMs }}"
- name: PROVER_TEST_DELAY_FACTOR
value: "{{ .Values.proverAgent.testDelayFactor }}"
- name: PROVER_AGENT_PROOF_TYPES
value: {{ join "," .Values.proverAgent.proofTypes | quote }}
- name: OTEL_RESOURCE_ATTRIBUTES
Expand Down
5 changes: 4 additions & 1 deletion spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,10 @@ proverAgent:
enabled: true
replicas: 1
pollIntervalMs: 1000
proofTypes: ["foo", "bar", "baz"]
proofTypes: []
testDelayType: "fixed"
testDelayMs: 100 # each fake proof takes 100ms
testDelayFactor: 1
gke:
spotEnabled: true
logLevel: "debug; info: aztec:simulator, json-rpc"
Expand Down
20 changes: 20 additions & 0 deletions spartan/aztec-network/values/prover-node-chaos.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
validator:
replicas: 1
validatorKeys:
- 0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b6b78690d
validatorAddresses:
- 0x70997970C51812dc3A010C7d01b50e0d17dc79C8
validator:
disabled: false

bootNode:
validator:
disabled: true

telemetry:
enabled: true

proverAgent:
testDelayType: "fixed"
testDelayMs: 2000

4 changes: 4 additions & 0 deletions spartan/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ case "$cmd" in
"test-kind-transfer-blob-with-sink")
OVERRIDES="blobSink.enabled=true" ./bootstrap.sh test-kind-transfer
;;
"test-kind-chaos-prover")
chaos-mesh/install.sh
OVERRIDES="proverAgent.testDelayMs=1000" NAMESPACE=chaos-prover FRESH_INSTALL=${FRESH_INSTALL:-true} INSTALL_METRICS=true ./scripts/test_kind.sh src/spartan/prover-node.test.ts ci.yaml
;;
"test-local")
# Isolate network stack in docker.
docker_isolate ../scripts/run_native_testnet.sh -i -val 3
Expand Down
7 changes: 5 additions & 2 deletions spartan/scripts/test_kind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,16 @@ if [ "$fresh_install" != "no-deploy" ]; then
OVERRIDES="$OVERRIDES" ./deploy_kind.sh $namespace $values_file $sepolia_run
fi

# Find 4 free ports between 9000 and 10000
free_ports="$(find_ports 5)"
# Find 6 free ports between 9000 and 10000
free_ports="$(find_ports 6)"

# Extract the free ports from the list
forwarded_pxe_port=$(echo $free_ports | awk '{print $1}')
forwarded_anvil_port=$(echo $free_ports | awk '{print $2}')
forwarded_metrics_port=$(echo $free_ports | awk '{print $3}')
forwarded_node_port=$(echo $free_ports | awk '{print $4}')
forwarded_sequencer_port=$(echo $free_ports | awk '{print $5}')
forwarded_prover_node_port=$(echo $free_ports | awk '{print $6}')

if [ "$install_metrics" = "true" ]; then
grafana_password=$(kubectl get secrets -n metrics metrics-grafana -o jsonpath='{.data.admin-password}' | base64 --decode)
Expand Down Expand Up @@ -125,6 +126,8 @@ export HOST_NODE_PORT="$forwarded_node_port"
export CONTAINER_NODE_PORT="8080"
export HOST_SEQUENCER_PORT=$forwarded_sequencer_port
export CONTAINER_SEQUENCER_PORT="8080"
export HOST_PROVER_NODE_PORT=$forwarded_prover_node_port
export CONTAINER_PROVER_NODE_PORT="8080"
export HOST_METRICS_PORT="$forwarded_metrics_port"
export CONTAINER_METRICS_PORT="80"
export GRAFANA_PASSWORD="$grafana_password"
Expand Down
31 changes: 31 additions & 0 deletions yarn-project/bb-prover/src/test/delay_values.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { ProvingRequestType } from '@aztec/circuit-types/interfaces/server';

export const WITGEN_DELAY_MS: Record<ProvingRequestType, number> = {
[ProvingRequestType.BASE_PARITY]: 60,
[ProvingRequestType.BLOCK_MERGE_ROLLUP]: 650,
[ProvingRequestType.BLOCK_ROOT_ROLLUP]: 60_000,
[ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP]: 0,
[ProvingRequestType.MERGE_ROLLUP]: 0,
[ProvingRequestType.PRIVATE_BASE_ROLLUP]: 400_000,
[ProvingRequestType.SINGLE_TX_BLOCK_ROOT_ROLLUP]: 0, // TBD
[ProvingRequestType.PUBLIC_BASE_ROLLUP]: 470_000,
[ProvingRequestType.ROOT_PARITY]: 100,
[ProvingRequestType.ROOT_ROLLUP]: 650,
[ProvingRequestType.TUBE_PROOF]: 0,
[ProvingRequestType.PUBLIC_VM]: 0,
};

export const PROOF_DELAY_MS: Record<ProvingRequestType, number> = {
[ProvingRequestType.BASE_PARITY]: 3_000,
[ProvingRequestType.BLOCK_MERGE_ROLLUP]: 15_000,
[ProvingRequestType.BLOCK_ROOT_ROLLUP]: 55_000,
[ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP]: 0,
[ProvingRequestType.MERGE_ROLLUP]: 0,
[ProvingRequestType.PRIVATE_BASE_ROLLUP]: 145_000,
[ProvingRequestType.SINGLE_TX_BLOCK_ROOT_ROLLUP]: 0, // TBD
[ProvingRequestType.PUBLIC_BASE_ROLLUP]: 160_000,
[ProvingRequestType.ROOT_PARITY]: 30_000,
[ProvingRequestType.ROOT_ROLLUP]: 15_000,
[ProvingRequestType.TUBE_PROOF]: 30_000,
[ProvingRequestType.PUBLIC_VM]: 0,
};
Loading