From 526c81b138220dbff9c847508d3db32e33145bdd Mon Sep 17 00:00:00 2001
From: Andrei Eres <eresav@me.com>
Date: Tue, 19 Dec 2023 12:14:22 +0100
Subject: [PATCH] subsystem benchmarks: add cpu profiling (#2734)

Ready-to-merge version of
https://github.com/paritytech/polkadot-sdk/pull/2601

- Added optional CPU profiling
- Updated instructions how to set up Prometheus, Pyroscope and Graphana
- Added a flamegraph dashboard
<img width="1470" alt="image"
src="https://github.com/paritytech/polkadot-sdk/assets/27277055/c8f3b33d-3c01-4ec0-ac34-72d52325b6e6">

---------

Co-authored-by: ordian <write@reusable.software>
---
 Cargo.lock                                    |  2 +
 polkadot/node/subsystem-bench/Cargo.toml      |  2 +
 polkadot/node/subsystem-bench/README.md       | 87 ++++++++++++-------
 .../subsystem-bench/docker/docker-compose.yml | 35 ++++++++
 .../docker/prometheus/prometheus.yml          | 11 +++
 .../grafana/cpu-profiling.json                | 70 +++++++++++++++
 .../subsystem-bench/src/subsystem-bench.rs    | 29 +++++++
 7 files changed, 206 insertions(+), 30 deletions(-)
 create mode 100644 polkadot/node/subsystem-bench/docker/docker-compose.yml
 create mode 100644 polkadot/node/subsystem-bench/docker/prometheus/prometheus.yml
 create mode 100644 polkadot/node/subsystem-bench/grafana/cpu-profiling.json
diff --git a/Cargo.lock b/Cargo.lock
index 32e68779fd1d..b65d6fc1b711 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -13322,6 +13322,8 @@ dependencies = [
  "polkadot-primitives",
  "polkadot-primitives-test-helpers",
  "prometheus",
+ "pyroscope",
+ "pyroscope_pprofrs",
  "rand 0.8.5",
  "sc-keystore",
  "sc-network",
diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml
index cf9fd8822dde..6504c8f714de 100644
--- a/polkadot/node/subsystem-bench/Cargo.toml
+++ b/polkadot/node/subsystem-bench/Cargo.toml
@@ -56,6 +56,8 @@ serde = "1.0.192"
 serde_yaml = "0.9"
 paste = "1.0.14"
 orchestra = { version = "0.3.3", default-features = false, features = ["futures_channel"] }
+pyroscope = "0.5.7"
+pyroscope_pprofrs = "0.2.7"
 
 [features]
 default = []
diff --git a/polkadot/node/subsystem-bench/README.md b/polkadot/node/subsystem-bench/README.md
index 21844853334b..b1476db27548 100644
--- a/polkadot/node/subsystem-bench/README.md
+++ b/polkadot/node/subsystem-bench/README.md
@@ -1,6 +1,6 @@
 # Subsystem benchmark client
 
-Run parachain consensus stress and performance tests on your development machine.  
+Run parachain consensus stress and performance tests on your development machine.
 
 ## Motivation
 
@@ -26,17 +26,26 @@ The output binary will be placed in `target/testnet/subsystem-bench`.
 
 ### Test metrics
 
-Subsystem, CPU usage and network  metrics are exposed via a prometheus endpoint during the test execution.
+Subsystem, CPU usage and network metrics are exposed via a prometheus endpoint during the test execution.
 A small subset of these collected metrics are displayed in the CLI, but for an in depth analysys of the test results,
 a local Grafana/Prometheus stack is needed.
 
+### Run Prometheus, Pyroscope and Graphana in Docker
+
+If docker is not usable, then follow the next sections to manually install Prometheus, Pyroscope and Graphana on your machine.
+
+```bash
+cd polkadot/node/subsystem-bench/docker
+docker compose up
+```
+
 ### Install Prometheus
 
 Please follow the [official installation guide](https://prometheus.io/docs/prometheus/latest/installation/) for your
 platform/OS.
 
 After succesfully installing and starting up Prometheus, we need to alter it's configuration such that it
-will scrape the benchmark prometheus endpoint  `127.0.0.1:9999`. Please check the prometheus official documentation
+will scrape the benchmark prometheus endpoint `127.0.0.1:9999`. Please check the prometheus official documentation
 regarding the location of `prometheus.yml`. On MacOS for example the full path `/opt/homebrew/etc/prometheus.yml`
 
 prometheus.yml:
@@ -57,13 +66,29 @@ scrape_configs:
 
 To complete this step restart Prometheus server such that it picks up the new configuration.
 
-### Install and setup Grafana
+### Install Pyroscope
+
+To collect CPU profiling data, you must be running the Pyroscope server.
+Follow the [installation guide](https://grafana.com/docs/pyroscope/latest/get-started/)
+relevant to your operating system.
+
+### Install Grafana
 
 Follow the [installation guide](https://grafana.com/docs/grafana/latest/setup-grafana/installation/) relevant
 to your operating system.
 
-Once you have the installation up and running, configure the local Prometheus as a data source by following
-[this guide](https://grafana.com/docs/grafana/latest/datasources/prometheus/configure-prometheus-data-source/)
+### Setup Grafana
+
+Once you have the installation up and running, configure the local Prometheus and Pyroscope (if needed)
+as data sources by following these guides:
+
+- [Prometheus](https://grafana.com/docs/grafana/latest/datasources/prometheus/configure-prometheus-data-source/)
+- [Pyroscope](https://grafana.com/docs/grafana/latest/datasources/grafana-pyroscope/)
+
+If you are running the servers in Docker, use the following URLs:
+
+- Prometheus `http://prometheus:9090/`
+- Pyroscope `http://pyroscope:4040/`
 
 #### Import dashboards
 
@@ -86,26 +111,29 @@ Commands:
 ```
 
 Note: `test-sequence` is a special test objective that wraps up an arbitrary number of test objectives. It is tipically
- used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml).
+used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml).
 
 ### Standard test options
-  
+
 ```
 Options:
-      --network <NETWORK>                    The type of network to be emulated [default: ideal] [possible values: 
-                                             ideal, healthy, degraded]
-      --n-cores <N_CORES>                    Number of cores to fetch availability for [default: 100]
-      --n-validators <N_VALIDATORS>          Number of validators to fetch chunks from [default: 500]
-      --min-pov-size <MIN_POV_SIZE>          The minimum pov size in KiB [default: 5120]
-      --max-pov-size <MAX_POV_SIZE>          The maximum pov size bytes [default: 5120]
-  -n, --num-blocks <NUM_BLOCKS>              The number of blocks the test is going to run [default: 1]
-  -p, --peer-bandwidth <PEER_BANDWIDTH>      The bandwidth of simulated remote peers in KiB
-  -b, --bandwidth <BANDWIDTH>                The bandwidth of our simulated node in KiB
-      --peer-error <PEER_ERROR>              Simulated conection error ratio [0-100]
-      --peer-min-latency <PEER_MIN_LATENCY>  Minimum remote peer latency in milliseconds [0-5000]
-      --peer-max-latency <PEER_MAX_LATENCY>  Maximum remote peer latency in milliseconds [0-5000]
-  -h, --help                                 Print help
-  -V, --version                              Print version
+      --network <NETWORK>                              The type of network to be emulated [default: ideal] [possible values:
+                                                       ideal, healthy, degraded]
+      --n-cores <N_CORES>                              Number of cores to fetch availability for [default: 100]
+      --n-validators <N_VALIDATORS>                    Number of validators to fetch chunks from [default: 500]
+      --min-pov-size <MIN_POV_SIZE>                    The minimum pov size in KiB [default: 5120]
+      --max-pov-size <MAX_POV_SIZE>                    The maximum pov size bytes [default: 5120]
+  -n, --num-blocks <NUM_BLOCKS>                        The number of blocks the test is going to run [default: 1]
+  -p, --peer-bandwidth <PEER_BANDWIDTH>                The bandwidth of simulated remote peers in KiB
+  -b, --bandwidth <BANDWIDTH>                          The bandwidth of our simulated node in KiB
+      --peer-error <PEER_ERROR>                        Simulated conection error ratio [0-100]
+      --peer-min-latency <PEER_MIN_LATENCY>            Minimum remote peer latency in milliseconds [0-5000]
+      --peer-max-latency <PEER_MAX_LATENCY>            Maximum remote peer latency in milliseconds [0-5000]
+      --profile                                        Enable CPU Profiling with Pyroscope
+      --pyroscope-url <PYROSCOPE_URL>                  Pyroscope Server URL [default: http://localhost:4040]
+      --pyroscope-sample-rate <PYROSCOPE_SAMPLE_RATE>  Pyroscope Sample Rate [default: 113]
+  -h, --help                                           Print help
+  -V, --version                                        Print version
 ```
 
 These apply to all test objectives, except `test-sequence` which relies on the values being specified in a file.
@@ -123,8 +151,8 @@ Benchmark availability recovery strategies
 Usage: subsystem-bench data-availability-read [OPTIONS]
 
 Options:
-  -f, --fetch-from-backers  Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU 
-                            as we don't need to re-construct from chunks. Tipically this is only faster if nodes 
+  -f, --fetch-from-backers  Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU
+                            as we don't need to re-construct from chunks. Tipically this is only faster if nodes
                             have enough bandwidth
   -h, --help                Print help
 ```
@@ -152,8 +180,8 @@ Let's run an availabilty read test which will recover availability for 10 cores
 node validator network.
 
 ```
- target/testnet/subsystem-bench --n-cores 10 data-availability-read 
-[2023-11-28T09:01:59Z INFO  subsystem_bench::core::display] n_validators = 500, n_cores = 10, pov_size = 5120 - 5120, 
+ target/testnet/subsystem-bench --n-cores 10 data-availability-read
+[2023-11-28T09:01:59Z INFO  subsystem_bench::core::display] n_validators = 500, n_cores = 10, pov_size = 5120 - 5120,
                                                             error = 0, latency = None
 [2023-11-28T09:01:59Z INFO  subsystem-bench::availability] Generating template candidate index=0 pov_size=5242880
 [2023-11-28T09:01:59Z INFO  subsystem-bench::availability] Created test environment.
@@ -167,8 +195,8 @@ node validator network.
 [2023-11-28T09:02:07Z INFO  subsystem_bench::availability] All blocks processed in 6001ms
 [2023-11-28T09:02:07Z INFO  subsystem_bench::availability] Throughput: 51200 KiB/block
 [2023-11-28T09:02:07Z INFO  subsystem_bench::availability] Block time: 6001 ms
-[2023-11-28T09:02:07Z INFO  subsystem_bench::availability] 
-    
+[2023-11-28T09:02:07Z INFO  subsystem_bench::availability]
+
     Total received from network: 66 MiB
     Total sent to network: 58 KiB
     Total subsystem CPU usage 4.16s
@@ -192,8 +220,7 @@ view the test progress in real time by accessing [this link](http://localhost:30
 
 Now run
 `target/testnet/subsystem-bench test-sequence --path polkadot/node/subsystem-bench/examples/availability_read.yaml`
-and view the metrics in real time and spot differences between different  `n_valiator` values.
-  
+and view the metrics in real time and spot differences between different `n_validators` values.
 ## Create new test objectives
 
 This tool is intended to make it easy to write new test objectives that focus individual subsystems,
diff --git a/polkadot/node/subsystem-bench/docker/docker-compose.yml b/polkadot/node/subsystem-bench/docker/docker-compose.yml
new file mode 100644
index 000000000000..fc5eb1f634e6
--- /dev/null
+++ b/polkadot/node/subsystem-bench/docker/docker-compose.yml
@@ -0,0 +1,35 @@
+services:
+  grafana:
+    image: grafana/grafana-enterprise:latest
+    container_name: grafana
+    restart: always
+    networks:
+      - subsystem-bench
+    ports:
+      - "3000:3000"
+
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: prometheus
+    restart: always
+    networks:
+      - subsystem-bench
+    volumes:
+      - ./prometheus:/etc/prometheus
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    ports:
+      - "9090:9090"
+      - "9999:9999"
+
+  pyroscope:
+    container_name: pyroscope
+    image: grafana/pyroscope:latest
+    restart: always
+    networks:
+      - subsystem-bench
+    ports:
+      - "4040:4040"
+
+networks:
+  subsystem-bench:
diff --git a/polkadot/node/subsystem-bench/docker/prometheus/prometheus.yml b/polkadot/node/subsystem-bench/docker/prometheus/prometheus.yml
new file mode 100644
index 000000000000..0bb25cfcb36c
--- /dev/null
+++ b/polkadot/node/subsystem-bench/docker/prometheus/prometheus.yml
@@ -0,0 +1,11 @@
+global:
+  scrape_interval: 5s
+
+scrape_configs:
+  - job_name: "prometheus"
+    static_configs:
+    - targets: ["localhost:9090"]
+  - job_name: "subsystem-bench"
+    scrape_interval: 0s500ms
+    static_configs:
+    - targets: ['host.docker.internal:9999']
diff --git a/polkadot/node/subsystem-bench/grafana/cpu-profiling.json b/polkadot/node/subsystem-bench/grafana/cpu-profiling.json
new file mode 100644
index 000000000000..0d53a1b93657
--- /dev/null
+++ b/polkadot/node/subsystem-bench/grafana/cpu-profiling.json
@@ -0,0 +1,70 @@
+{
+    "annotations": {
+      "list": [
+        {
+          "builtIn": 1,
+          "datasource": {
+            "type": "grafana",
+            "uid": "-- Grafana --"
+          },
+          "enable": true,
+          "hide": true,
+          "iconColor": "rgba(0, 211, 255, 1)",
+          "name": "Annotations & Alerts",
+          "type": "dashboard"
+        }
+      ]
+    },
+    "editable": true,
+    "fiscalYearStartMonth": 0,
+    "graphTooltip": 0,
+    "id": 1,
+    "links": [],
+    "liveNow": false,
+    "panels": [
+      {
+        "datasource": {
+          "type": "grafana-pyroscope-datasource",
+          "uid": "bc3bc04f-85f9-464b-8ae3-fbe0949063f6"
+        },
+        "gridPos": {
+          "h": 18,
+          "w": 24,
+          "x": 0,
+          "y": 0
+        },
+        "id": 1,
+        "targets": [
+          {
+            "datasource": {
+              "type": "grafana-pyroscope-datasource",
+              "uid": "bc3bc04f-85f9-464b-8ae3-fbe0949063f6"
+            },
+            "groupBy": [],
+            "labelSelector": "{service_name=\"subsystem-bench\"}",
+            "profileTypeId": "process_cpu:cpu:nanoseconds:cpu:nanoseconds",
+            "queryType": "profile",
+            "refId": "A"
+          }
+        ],
+        "title": "CPU Profiling",
+        "type": "flamegraph"
+      }
+    ],
+    "refresh": "",
+    "schemaVersion": 38,
+    "tags": [],
+    "templating": {
+      "list": []
+    },
+    "time": {
+      "from": "now-6h",
+      "to": "now"
+    },
+    "timepicker": {},
+    "timezone": "",
+    "title": "CPU Profiling",
+    "uid": "c31191d5-fe2b-49e2-8b1c-1451f31d1628",
+    "version": 1,
+    "weekStart": ""
+  }
diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs
index da7e5441f748..29b62b27855a 100644
--- a/polkadot/node/subsystem-bench/src/subsystem-bench.rs
+++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs
@@ -18,6 +18,8 @@
 //! CI regression testing.
 use clap::Parser;
 use color_eyre::eyre;
+use pyroscope::PyroscopeAgent;
+use pyroscope_pprofrs::{pprof_backend, PprofConfig};
 
 use colored::Colorize;
 use std::{path::Path, time::Duration};
@@ -76,12 +78,34 @@ struct BenchCli {
 	/// Maximum remote peer latency in milliseconds [0-5000].
 	pub peer_max_latency: Option<u64>,
 
+	#[clap(long, default_value_t = false)]
+	/// Enable CPU Profiling with Pyroscope
+	pub profile: bool,
+
+	#[clap(long, requires = "profile", default_value_t = String::from("http://localhost:4040"))]
+	/// Pyroscope Server URL
+	pub pyroscope_url: String,
+
+	#[clap(long, requires = "profile", default_value_t = 113)]
+	/// Pyroscope Sample Rate
+	pub pyroscope_sample_rate: u32,
+
 	#[command(subcommand)]
 	pub objective: cli::TestObjective,
 }
 
 impl BenchCli {
 	fn launch(self) -> eyre::Result<()> {
+		let agent_running = if self.profile {
+			let agent = PyroscopeAgent::builder(self.pyroscope_url.as_str(), "subsystem-bench")
+				.backend(pprof_backend(PprofConfig::new().sample_rate(self.pyroscope_sample_rate)))
+				.build()?;
+
+			Some(agent.start()?)
+		} else {
+			None
+		};
+
 		let configuration = self.standard_configuration;
 		let mut test_config = match self.objective {
 			TestObjective::TestSequence(options) => {
@@ -165,6 +189,11 @@ impl BenchCli {
 		env.runtime()
 			.block_on(availability::benchmark_availability_read(&mut env, state));
 
+		if let Some(agent_running) = agent_running {
+			let agent_ready = agent_running.stop()?;
+			agent_ready.shutdown();
+		}
+
 		Ok(())
 	}
 }