Skip to content

Commit

Permalink
metrics: Expose process_cpu_seconds_total as a float (#754)
Browse files Browse the repository at this point in the history
Prometheus handles all values as `f64`, but we only expose values as
whole integers. This means that the `process_cpu_seconds_total` metric
only exposes whole second values, while Linux exposes process time in
10ms increments.

This change modifies the `Counter` metric type to store an additional
marker that provides a strategy for converting the stored `u64` value to
`f64` for export.  This strategy is employed so that we can continue to
use `AtomicU64` to back counters and only use floats at export-time.  By
default the unit type is used to convert counters as before, but an
alternate `MillisAsSeconds` strategy is used to expose fractional
seconds from a millisecond counter.

This necessitates changing the histogram buckets to floats as well.
While this change doesn't modify the bucket values, this sets up future
changes to latency metrics.
  • Loading branch information
olix0r authored Nov 30, 2020
1 parent 1e9a001 commit 876ae02
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 179 deletions.
41 changes: 26 additions & 15 deletions linkerd/app/core/src/telemetry/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ impl FmtMetrics for Report {
#[cfg(target_os = "linux")]
mod system {
use libc::{self, pid_t};
use linkerd2_metrics::{metrics, Counter, FmtMetrics, Gauge};
use linkerd2_metrics::{metrics, Counter, FmtMetrics, Gauge, MillisAsSeconds};
use procinfo::pid;
use std::fmt;
use std::{fs, io};
use tracing::{error, warn};

metrics! {
process_cpu_seconds_total: Counter {
process_cpu_seconds_total: Counter<MillisAsSeconds> {
"Total user and system CPU time spent in seconds."
},
process_open_fds: Gauge { "Number of open file descriptors." },
Expand All @@ -77,16 +77,28 @@ mod system {
#[derive(Clone, Debug)]
pub(super) struct System {
page_size: u64,
clock_ticks_per_sec: u64,
ms_per_tick: u64,
}

impl System {
pub fn new() -> io::Result<Self> {
let page_size = Self::sysconf(libc::_SC_PAGESIZE, "page size")?;

// On Linux, CLK_TCK is ~always `100`, so pure integer division
// works. This is probably not suitable if we encounter other
// values.
let clock_ticks_per_sec = Self::sysconf(libc::_SC_CLK_TCK, "clock ticks per second")?;
let ms_per_tick = 1_000 / clock_ticks_per_sec;
if clock_ticks_per_sec != 100 {
warn!(
clock_ticks_per_sec,
ms_per_tick, "Unexpected value; process_cpu_seconds_total may be inaccurate."
);
}

Ok(Self {
page_size,
clock_ticks_per_sec,
ms_per_tick,
})
}

Expand Down Expand Up @@ -130,9 +142,16 @@ mod system {
};

let clock_ticks = stat.utime as u64 + stat.stime as u64;
let cpu = Counter::from(clock_ticks / self.clock_ticks_per_sec);
let cpu_ms = clock_ticks * self.ms_per_tick;
process_cpu_seconds_total.fmt_help(f)?;
process_cpu_seconds_total.fmt_metric(f, &cpu)?;
process_cpu_seconds_total.fmt_metric(f, &Counter::from(cpu_ms))?;

process_virtual_memory_bytes.fmt_help(f)?;
process_virtual_memory_bytes.fmt_metric(f, &Gauge::from(stat.vsize as u64))?;

process_resident_memory_bytes.fmt_help(f)?;
process_resident_memory_bytes
.fmt_metric(f, &Gauge::from(stat.rss as u64 * self.page_size))?;

match Self::open_fds(stat.pid) {
Ok(open_fds) => {
Expand All @@ -141,7 +160,6 @@ mod system {
}
Err(err) => {
warn!("could not determine process_open_fds: {}", err);
return Ok(());
}
}

Expand All @@ -153,17 +171,10 @@ mod system {
}
Err(err) => {
warn!("could not determine process_max_fds: {}", err);
return Ok(());
}
}

process_virtual_memory_bytes.fmt_help(f)?;
let vsz = Gauge::from(stat.vsize as u64);
process_virtual_memory_bytes.fmt_metric(f, &vsz)?;

process_resident_memory_bytes.fmt_help(f)?;
let rss = Gauge::from(stat.rss as u64 * self.page_size);
process_resident_memory_bytes.fmt_metric(f, &rss)
Ok(())
}
}
}
Expand Down
106 changes: 71 additions & 35 deletions linkerd/metrics/src/counter.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use super::prom::{FmtLabels, FmtMetric, MAX_PRECISE_VALUE};
use super::{
prom::{FmtLabels, FmtMetric},
Factor,
};
use std::fmt::{self, Display};
use std::sync::atomic::{AtomicU64, Ordering};

Expand All @@ -14,41 +17,58 @@ use std::sync::atomic::{AtomicU64, Ordering};
/// [`rate()`]: https://prometheus.io/docs/prometheus/latest/querying/functions/#rate()
/// [`irate()`]: https://prometheus.io/docs/prometheus/latest/querying/functions/#irate()
/// [`resets()`]: https://prometheus.io/docs/prometheus/latest/querying/functions/#resets
#[derive(Debug, Default)]
pub struct Counter(AtomicU64);
#[derive(Debug)]
pub struct Counter<F = ()>(AtomicU64, std::marker::PhantomData<F>);

// ===== impl Counter =====

impl Counter {
impl<F> Default for Counter<F> {
fn default() -> Self {
Self(AtomicU64::default(), std::marker::PhantomData)
}
}

impl<F> Counter<F> {
pub fn new() -> Self {
Self::default()
}

pub fn incr(&self) {
self.add(1)
}

pub fn add(&self, n: u64) {
self.0.fetch_add(n, Ordering::Release);
}
}

impl<F: Factor> Counter<F> {
/// Return current counter value, wrapped to be safe for use with Prometheus.
pub fn value(&self) -> u64 {
self.0
.load(Ordering::Acquire)
.wrapping_rem(MAX_PRECISE_VALUE + 1)
pub fn value(&self) -> f64 {
let n = self.0.load(Ordering::Acquire);
F::factor(n)
}
}

impl Into<u64> for Counter {
fn into(self) -> u64 {
impl<F: Factor> Into<f64> for &Counter<F> {
fn into(self) -> f64 {
self.value()
}
}

impl From<u64> for Counter {
impl<F> Into<u64> for &Counter<F> {
fn into(self) -> u64 {
self.0.load(Ordering::Acquire)
}
}

impl<F> From<u64> for Counter<F> {
fn from(value: u64) -> Self {
Counter(value.into())
Counter(value.into(), std::marker::PhantomData)
}
}

impl FmtMetric for Counter {
impl<F: Factor> FmtMetric for Counter<F> {
const KIND: &'static str = "counter";

fn fmt_metric<N: Display>(&self, f: &mut fmt::Formatter<'_>, name: N) -> fmt::Result {
Expand All @@ -74,34 +94,50 @@ impl FmtMetric for Counter {
#[cfg(test)]
mod tests {
use super::*;
use crate::{MillisAsSeconds, MAX_PRECISE_UINT64};

#[test]
fn count_simple() {
let cnt = Counter::from(0);
assert_eq!(cnt.value(), 0);
cnt.incr();
assert_eq!(cnt.value(), 1);
cnt.add(41);
assert_eq!(cnt.value(), 42);
cnt.add(0);
assert_eq!(cnt.value(), 42);
let c = Counter::<()>::default();
assert_eq!(c.value(), 0.0);
c.incr();
assert_eq!(c.value(), 1.0);
c.add(41);
assert_eq!(c.value(), 42.0);
c.add(0);
assert_eq!(c.value(), 42.0);
}

#[test]
fn count_wrapping() {
let cnt = Counter::from(MAX_PRECISE_VALUE - 1);
assert_eq!(cnt.value(), MAX_PRECISE_VALUE - 1);
cnt.incr();
assert_eq!(cnt.value(), MAX_PRECISE_VALUE);
cnt.incr();
assert_eq!(cnt.value(), 0);
cnt.incr();
assert_eq!(cnt.value(), 1);

let max = Counter::from(MAX_PRECISE_VALUE);
assert_eq!(max.value(), MAX_PRECISE_VALUE);

let over = Counter::from(MAX_PRECISE_VALUE + 1);
assert_eq!(over.value(), 0);
let c = Counter::<()>::from(MAX_PRECISE_UINT64 - 1);
assert_eq!(c.value(), (MAX_PRECISE_UINT64 - 1) as f64);
c.incr();
assert_eq!(c.value(), MAX_PRECISE_UINT64 as f64);
c.incr();
assert_eq!(c.value(), 0.0);
c.incr();
assert_eq!(c.value(), 1.0);

let max = Counter::<()>::from(MAX_PRECISE_UINT64);
assert_eq!(max.value(), MAX_PRECISE_UINT64 as f64);
}

#[test]
fn millis_as_seconds() {
let c = Counter::<MillisAsSeconds>::from(1);
assert_eq!(c.value(), 0.001);

let c = Counter::<MillisAsSeconds>::from((MAX_PRECISE_UINT64 - 1) * 1000);
assert_eq!(c.value(), (MAX_PRECISE_UINT64 - 1) as f64);
c.add(1000);
assert_eq!(c.value(), MAX_PRECISE_UINT64 as f64);
c.add(1000);
assert_eq!(c.value(), 0.0);
c.add(1000);
assert_eq!(c.value(), 1.0);

let max = Counter::<MillisAsSeconds>::from(MAX_PRECISE_UINT64 * 1000);
assert_eq!(max.value(), MAX_PRECISE_UINT64 as f64);
}
}
4 changes: 2 additions & 2 deletions linkerd/metrics/src/gauge.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::prom::{FmtLabels, FmtMetric, MAX_PRECISE_VALUE};
use super::prom::{FmtLabels, FmtMetric};
use std::fmt::{self, Display};
use std::sync::atomic::{AtomicU64, Ordering};

Expand All @@ -20,7 +20,7 @@ impl Gauge {
pub fn value(&self) -> u64 {
self.0
.load(Ordering::Acquire)
.wrapping_rem(MAX_PRECISE_VALUE + 1)
.wrapping_rem(crate::MAX_PRECISE_UINT64 + 1)
}
}

Expand Down
Loading

0 comments on commit 876ae02

Please sign in to comment.