Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Commit

Permalink
overseer: draft of prometheus metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
ordian committed Aug 4, 2020
1 parent 5f5ee13 commit b6e221d
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions node/overseer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ polkadot-primitives = { path = "../../primitives" }
client = { package = "sc-client-api", git = "https://github.com/paritytech/substrate", branch = "master" }
polkadot-subsystem = { package = "polkadot-node-subsystem", path = "../subsystem" }
polkadot-node-primitives = { package = "polkadot-node-primitives", path = "../primitives" }
substrate-prometheus-endpoint = { git = "https://github.com/paritytech/substrate", branch = "master" }
async-trait = "0.1"

[dev-dependencies]
Expand Down
1 change: 1 addition & 0 deletions node/overseer/examples/minimal-example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ fn main() {
let (overseer, _handler) = Overseer::new(
vec![],
all_subsystems,
None,
spawner,
).unwrap();
let overseer_fut = overseer.run().fuse();
Expand Down
85 changes: 84 additions & 1 deletion node/overseer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,16 @@ pub use polkadot_subsystem::{
SpawnedSubsystem, ActiveLeavesUpdate,
};
use polkadot_node_primitives::SpawnNamed;
use substrate_prometheus_endpoint as prometheus;


// A capacity of bounded channels inside the overseer.
const CHANNEL_CAPACITY: usize = 1024;
// A graceful `Overseer` teardown time delay.
const STOP_DELAY: u64 = 1;
// Target for logs.
const LOG_TARGET: &'static str = "overseer";


/// A type of messages that are sent from [`Subsystem`] to [`Overseer`].
///
Expand Down Expand Up @@ -380,6 +384,11 @@ pub struct Overseer<S: SpawnNamed> {

/// The set of the "active leaves".
active_leaves: HashSet<(Hash, BlockNumber)>,

/// Various prometheus metrics.
/// `None` if `Metrics::try_register` fails for some reason
/// or if no registry was provided.
metrics: Option<Metrics>,
}

/// This struct is passed as an argument to create a new instance of an [`Overseer`].
Expand Down Expand Up @@ -420,6 +429,60 @@ pub struct AllSubsystems<CV, CB, CS, SD, AD, BS, BD, P, PoVD, RA, AS, NB> {
pub network_bridge: NB,
}

/// Various prometheus metrics.
struct Metrics {
active_heads_count: prometheus::Gauge<prometheus::U64>,
// TODO do these metrics live here or in subsystems?
// TODO should this be a CounterVec?
validation_requests_served_total: prometheus::Counter<prometheus::U64>,
validation_requests_succeeded_total: prometheus::Counter<prometheus::U64>,
// TODO can we derive this from served - succeeded?
// should this count _internal_ errors instead?
validation_requests_failed_total: prometheus::Counter<prometheus::U64>,
// Number of statements signed
// Number of bitfields signed
// Number of availability chunks received
// Number of candidates seconded
// Number of collations generated
// Number of Runtime API errors encountered
}

impl Metrics {
/// Try to register metrics in the prometheus registry.
fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
Ok(Self {
active_heads_count: prometheus::register(
prometheus::Gauge::new(
"active_heads_count",
"Number of active heads."
)?, // TODO: should this be `.expect(PROOF)`?
registry,
)?,
validation_requests_served_total: prometheus::register(
prometheus::Counter::new(
"validation_requests_served_total",
"Total number of validation requests served.",
)?,
registry,
)?,
validation_requests_succeeded_total: prometheus::register(
prometheus::Counter::new(
"validation_requests_succeeded_total",
"Total number of validation requests succeeded.",
)?,
registry,
)?,
validation_requests_failed_total: prometheus::register(
prometheus::Counter::new(
"validation_requests_failed_total",
"Total number of validation requests failed.",
)?,
registry,
)?,
})
}
}

impl<S> Overseer<S>
where
S: SpawnNamed,
Expand Down Expand Up @@ -503,6 +566,7 @@ where
/// let (overseer, _handler) = Overseer::new(
/// vec![],
/// all_subsystems,
/// None,
/// spawner,
/// ).unwrap();
///
Expand All @@ -522,6 +586,7 @@ where
pub fn new<CV, CB, CS, SD, AD, BS, BD, P, PoVD, RA, AS, NB>(
leaves: impl IntoIterator<Item = BlockInfo>,
all_subsystems: AllSubsystems<CV, CB, CS, SD, AD, BS, BD, P, PoVD, RA, AS, NB>,
prometheus_registry: Option<&prometheus::Registry>,
mut s: S,
) -> SubsystemResult<(Self, OverseerHandler)>
where
Expand Down Expand Up @@ -638,6 +703,19 @@ where
.map(|BlockInfo { hash, parent_hash: _, number }| (hash, number))
.collect();

let metrics = match prometheus_registry {
Some(registry) => {
match Metrics::try_register(registry) {
Ok(metrics) => Some(metrics),
Err(e) => {
log::warn!(target: LOG_TARGET, "Failed to register metrics: {:?}", e);
None
},
}
},
None => None,
};

let this = Self {
candidate_validation_subsystem,
candidate_backing_subsystem,
Expand All @@ -657,6 +735,7 @@ where
events_rx,
leaves,
active_leaves,
metrics,
};

Ok((this, handler))
Expand Down Expand Up @@ -767,7 +846,7 @@ where

// Some subsystem exited? It's time to panic.
if let Poll::Ready(Some(finished)) = poll!(self.running_subsystems.next()) {
log::error!("Subsystem finished unexpectedly {:?}", finished);
log::error!(target: LOG_TARGET, "Subsystem finished unexpectedly {:?}", finished);
self.stop().await;
return Err(SubsystemError);
}
Expand Down Expand Up @@ -1088,6 +1167,7 @@ mod tests {
let (overseer, mut handler) = Overseer::new(
vec![],
all_subsystems,
None,
spawner,
).unwrap();
let overseer_fut = overseer.run().fuse();
Expand Down Expand Up @@ -1151,6 +1231,7 @@ mod tests {
let (overseer, _handle) = Overseer::new(
vec![],
all_subsystems,
None,
spawner,
).unwrap();
let overseer_fut = overseer.run().fuse();
Expand Down Expand Up @@ -1267,6 +1348,7 @@ mod tests {
let (overseer, mut handler) = Overseer::new(
vec![first_block],
all_subsystems,
None,
spawner,
).unwrap();

Expand Down Expand Up @@ -1369,6 +1451,7 @@ mod tests {
let (overseer, mut handler) = Overseer::new(
vec![first_block, second_block],
all_subsystems,
None,
spawner,
).unwrap();

Expand Down
4 changes: 3 additions & 1 deletion node/service/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ fn new_partial<RuntimeApi, Executor>(config: &mut Configuration) -> Result<

fn real_overseer<S: SpawnNamed>(
leaves: impl IntoIterator<Item = BlockInfo>,
prometheus_registry: Option<&Registry>,
s: S,
) -> Result<(Overseer<S>, OverseerHandler), ServiceError> {
let all_subsystems = AllSubsystems {
Expand All @@ -291,6 +292,7 @@ fn real_overseer<S: SpawnNamed>(
Overseer::new(
leaves,
all_subsystems,
prometheus_registry,
s,
).map_err(|e| ServiceError::Other(format!("Failed to create an Overseer: {:?}", e)))
}
Expand Down Expand Up @@ -392,7 +394,7 @@ fn new_full<RuntimeApi, Executor>(
})
.collect();

let (overseer, handler) = real_overseer(leaves, spawner)?;
let (overseer, handler) = real_overseer(leaves, prometheus_registry.as_ref(), spawner)?;
let handler_clone = handler.clone();

task_manager.spawn_essential_handle().spawn_blocking("overseer", Box::pin(async move {
Expand Down

0 comments on commit b6e221d

Please sign in to comment.