Skip to content

Commit

Permalink
Add dedicated prometheus metrics endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
paulhauner committed Sep 14, 2020
1 parent 528ffa1 commit 404d681
Show file tree
Hide file tree
Showing 14 changed files with 501 additions and 3 deletions.
23 changes: 23 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ members = [
"beacon_node/eth1",
"beacon_node/eth2_libp2p",
"beacon_node/http_api",
"beacon_node/http_metrics",
"beacon_node/network",
"beacon_node/rest_api",
"beacon_node/store",
Expand Down
1 change: 1 addition & 0 deletions beacon_node/client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ lighthouse_metrics = { path = "../../common/lighthouse_metrics" }
time = "0.2.16"
bus = "2.2.3"
http_api = { path = "../http_api" }
http_metrics = { path = "../http_metrics" }
48 changes: 46 additions & 2 deletions beacon_node/client/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ use eth2_libp2p::NetworkGlobals;
use genesis::{interop_genesis_state, Eth1GenesisService};
use network::{NetworkConfig, NetworkMessage, NetworkService};
use parking_lot::Mutex;
use slog::info;
use slog::{debug, info};
use ssz::Decode;
use std::net::SocketAddr;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use timer::spawn_timer;
Expand Down Expand Up @@ -60,7 +60,10 @@ pub struct ClientBuilder<T: BeaconChainTypes> {
event_handler: Option<T::EventHandler>,
network_globals: Option<Arc<NetworkGlobals<T::EthSpec>>>,
network_send: Option<UnboundedSender<NetworkMessage<T::EthSpec>>>,
db_path: Option<PathBuf>,
freezer_db_path: Option<PathBuf>,
http_api_config: http_api::Config,
http_metrics_config: http_metrics::Config,
websocket_listen_addr: Option<SocketAddr>,
eth_spec_instance: T::EthSpec,
}
Expand Down Expand Up @@ -102,7 +105,10 @@ where
event_handler: None,
network_globals: None,
network_send: None,
db_path: None,
freezer_db_path: None,
http_api_config: <_>::default(),
http_metrics_config: <_>::default(),
websocket_listen_addr: None,
eth_spec_instance,
}
Expand Down Expand Up @@ -285,6 +291,12 @@ where
self
}

/// Provides configuration for the HTTP server that serves Prometheus metrics.
pub fn http_metrics_config(mut self, config: http_metrics::Config) -> Self {
self.http_metrics_config = config;
self
}

/// Immediately starts the service that periodically logs information each slot.
pub fn notifier(self) -> Result<Self, String> {
let context = self
Expand Down Expand Up @@ -354,6 +366,7 @@ where
.unwrap(); // TODO

self.runtime_context
.clone()
.unwrap()
.executor
.spawn_without_exit(async move { server.await }, "http-api");
Expand All @@ -364,10 +377,38 @@ where
None
};

let http_metrics_listen_addr = if self.http_metrics_config.enabled {
let ctx = Arc::new(http_metrics::Context {
config: self.http_metrics_config.clone(),
chain: self.beacon_chain.clone(),
db_path: self.db_path.clone(),
freezer_db_path: self.freezer_db_path.clone(),
log: log.clone(),
});

// TODO
let exit = self.runtime_context.as_ref().unwrap().executor.exit();

let (listen_addr, server) = http_metrics::serve(ctx, exit)
.map_err(|e| format!("Unable to start HTTP API server: {:?}", e))
.unwrap(); // TODO

self.runtime_context
.unwrap()
.executor
.spawn_without_exit(async move { server.await }, "http-api");

Some(listen_addr)
} else {
debug!(log, "Metrics server is disabled");
None
};

Client {
beacon_chain: self.beacon_chain,
network_globals: self.network_globals,
http_api_listen_addr,
http_metrics_listen_addr,
websocket_listen_addr: self.websocket_listen_addr,
}
}
Expand Down Expand Up @@ -504,6 +545,9 @@ where
.clone()
.ok_or_else(|| "disk_store requires a chain spec".to_string())?;

self.db_path = Some(hot_path.into());
self.freezer_db_path = Some(cold_path.into());

let store = HotColdDB::open(hot_path, cold_path, config, spec, context.log().clone())
.map_err(|e| format!("Unable to open database: {:?}", e))?;
self.store = Some(Arc::new(store));
Expand Down
2 changes: 2 additions & 0 deletions beacon_node/client/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ pub struct Config {
pub websocket_server: websocket_server::Config,
pub eth1: eth1::Config,
pub http_api: http_api::Config,
pub http_metrics: http_metrics::Config,
}

impl Default for Config {
Expand All @@ -88,6 +89,7 @@ impl Default for Config {
disabled_forks: Vec::new(),
graffiti: Graffiti::default(),
http_api: <_>::default(),
http_metrics: <_>::default(),
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions beacon_node/client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ pub struct Client<T: BeaconChainTypes> {
network_globals: Option<Arc<NetworkGlobals<T::EthSpec>>>,
/// Listen address for the standard eth2.0 API, if the service was started.
http_api_listen_addr: Option<SocketAddr>,
/// Listen address for the HTTP server which serves Prometheus metrics.
http_metrics_listen_addr: Option<SocketAddr>,
websocket_listen_addr: Option<SocketAddr>,
}

Expand All @@ -39,6 +41,11 @@ impl<T: BeaconChainTypes> Client<T> {
self.http_api_listen_addr
}

/// Returns the address of the client's HTTP Prometheus metrics server, if it was started.
pub fn http_metrics_listen_addr(&self) -> Option<SocketAddr> {
self.http_metrics_listen_addr
}

/// Returns the address of the client's WebSocket API server, if it was started.
pub fn websocket_listen_addr(&self) -> Option<SocketAddr> {
self.websocket_listen_addr
Expand Down
29 changes: 29 additions & 0 deletions beacon_node/http_metrics/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[package]
name = "http_metrics"
version = "0.1.0"
authors = ["Paul Hauner <paul@paulhauner.com>"]
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
prometheus = "0.9.0"
warp = "0.2.5"
serde = { version = "1.0.110", features = ["derive"] }
slog = "2.5.2"
beacon_chain = { path = "../beacon_chain" }
store = { path = "../store" }
eth2_libp2p = { path = "../eth2_libp2p" }
slot_clock = { path = "../../common/slot_clock" }
lighthouse_metrics = { path = "../../common/lighthouse_metrics" }
lazy_static = "1.4.0"

[dev-dependencies]
tokio = { version = "0.2.21", features = ["sync"] }
reqwest = { version = "0.10.8", features = ["json"] }
environment = { path = "../../lighthouse/environment" }
types = { path = "../../consensus/types" }

[target.'cfg(target_os = "linux")'.dependencies]
psutil = "3.1.0"
procinfo = "0.4.2"
72 changes: 72 additions & 0 deletions beacon_node/http_metrics/src/health.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
use serde::{Deserialize, Serialize};

#[cfg(target_os = "linux")]
use {procinfo::pid, psutil::process::Process};

#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
/// Reports on the health of the Lighthouse instance.
pub struct Health {
/// The pid of this process.
pub pid: u32,
/// The number of threads used by this pid.
pub pid_num_threads: i32,
/// The total resident memory used by this pid.
pub pid_mem_resident_set_size: u64,
/// The total virtual memory used by this pid.
pub pid_mem_virtual_memory_size: u64,
/// Total virtual memory on the system
pub sys_virt_mem_total: u64,
/// Total virtual memory available for new processes.
pub sys_virt_mem_available: u64,
/// Total virtual memory used on the system
pub sys_virt_mem_used: u64,
/// Total virtual memory not used on the system
pub sys_virt_mem_free: u64,
/// Percentage of virtual memory used on the system
pub sys_virt_mem_percent: f32,
/// System load average over 1 minute.
pub sys_loadavg_1: f64,
/// System load average over 5 minutes.
pub sys_loadavg_5: f64,
/// System load average over 15 minutes.
pub sys_loadavg_15: f64,
}

impl Health {
#[cfg(not(target_os = "linux"))]
pub fn observe() -> Result<Self, String> {
Err("Health is only available on Linux".into())
}

#[cfg(target_os = "linux")]
pub fn observe() -> Result<Self, String> {
let process =
Process::current().map_err(|e| format!("Unable to get current process: {:?}", e))?;

let process_mem = process
.memory_info()
.map_err(|e| format!("Unable to get process memory info: {:?}", e))?;

let stat = pid::stat_self().map_err(|e| format!("Unable to get stat: {:?}", e))?;

let vm = psutil::memory::virtual_memory()
.map_err(|e| format!("Unable to get virtual memory: {:?}", e))?;
let loadavg =
psutil::host::loadavg().map_err(|e| format!("Unable to get loadavg: {:?}", e))?;

Ok(Self {
pid: process.pid(),
pid_num_threads: stat.num_threads,
pid_mem_resident_set_size: process_mem.rss(),
pid_mem_virtual_memory_size: process_mem.vms(),
sys_virt_mem_total: vm.total(),
sys_virt_mem_available: vm.available(),
sys_virt_mem_used: vm.used(),
sys_virt_mem_free: vm.free(),
sys_virt_mem_percent: vm.percent(),
sys_loadavg_1: loadavg.one,
sys_loadavg_5: loadavg.five,
sys_loadavg_15: loadavg.fifteen,
})
}
}
Loading

0 comments on commit 404d681

Please sign in to comment.