Skip to content

Commit

Permalink
feat: implement advanced event loop monitoring (#278)
Browse files Browse the repository at this point in the history
* feat: implement advanced event loop monitoring

* Update lib/metrics/eventLoopLag.js

Co-Authored-By: Sam Roberts <vieuxtech@gmail.com>

* Update lib/metrics/eventLoopLag.js

Co-Authored-By: Sam Roberts <vieuxtech@gmail.com>

* Update lib/metrics/eventLoopLag.js

Co-Authored-By: Sam Roberts <vieuxtech@gmail.com>

* fix: remove timestamp support, fix tests

* Update lib/metrics/eventLoopLag.js

Co-Authored-By: Sam Roberts <vieuxtech@gmail.com>

* Update lib/metrics/eventLoopLag.js

Co-Authored-By: Sam Roberts <vieuxtech@gmail.com>

* Update lib/metrics/eventLoopLag.js

Co-Authored-By: Sam Roberts <vieuxtech@gmail.com>

Co-authored-by: Sam Roberts <vieuxtech@gmail.com>
  • Loading branch information
yvasiyarov and sam-github authored Feb 12, 2020
1 parent fc0497b commit c63689b
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ project adheres to [Semantic Versioning](http://semver.org/).
### Added

- feat: implement GC metrics collection without native(C++) modules.
- faet: implement advanced event loop monitoring

## [11.5.3] - 2019-06-27

Expand Down
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,14 @@ In addition, some Node-specific metrics are included, such as event loop lag,
active handles, GC and Node.js version. See what metrics there are in
[lib/metrics](lib/metrics).

`collectDefaultMetrics` takes 1 options object with up to 4 entries, a timeout for how
often the probe should be fired, an optional prefix for metric names,
a registry to which metrics should be registered and
`gcDurationBuckets` with custom buckets for GC duration histogram.
Default buckets of GC duration histogram are `[0.001, 0.01, 0.1, 1, 2, 5]` (in seconds).
`collectDefaultMetrics` takes 1 options object with following entries:

- `timeout` for how often the probe should be fired. Default: 10 seconds.
- `prefix` an optional prefix for metric names.
- `registry` to which metrics should be registered.
- `gcDurationBuckets` with custom buckets for GC duration histogram. Default buckets of GC duration histogram are `[0.001, 0.01, 0.1, 1, 2, 5]` (in seconds).
- `eventLoopMonitoringPrecision` with sampling rate in milliseconds. Must be greater than zero. Default: 10.

By default probes are launched every 10 seconds, but this can be modified like this:

```js
Expand Down
1 change: 1 addition & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,7 @@ export interface DefaultMetricsCollectorConfiguration {
register?: Registry;
prefix?: string;
gcDurationBuckets?: number[];
eventLoopMonitoringPrecision?: number;
}

/**
Expand Down
1 change: 1 addition & 0 deletions lib/defaultMetrics.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ module.exports = function startDefaultMetrics(config) {
config = Object.assign(
{
timestamps: true,
eventLoopMonitoringPrecision: 10,
timeout: 10000
},
config
Expand Down
89 changes: 84 additions & 5 deletions lib/metrics/eventLoopLag.js
Original file line number Diff line number Diff line change
@@ -1,31 +1,110 @@
'use strict';

const Gauge = require('../gauge');

// Check if perf_hooks module is available
let perf_hooks;
try {
/* eslint-disable node/no-unsupported-features/node-builtins */
perf_hooks = require('perf_hooks');
} catch (e) {
// node version is too old
}

const NODEJS_EVENTLOOP_LAG = 'nodejs_eventloop_lag_seconds';
const NODEJS_EVENTLOOP_LAG_MIN = 'nodejs_eventloop_lag_min_seconds';
const NODEJS_EVENTLOOP_LAG_MAX = 'nodejs_eventloop_lag_max_seconds';
const NODEJS_EVENTLOOP_LAG_MEAN = 'nodejs_eventloop_lag_mean_seconds';
const NODEJS_EVENTLOOP_LAG_STDDEV = 'nodejs_eventloop_lag_stddev_seconds';
const NODEJS_EVENTLOOP_LAG_P50 = 'nodejs_eventloop_lag_p50_seconds';
const NODEJS_EVENTLOOP_LAG_P90 = 'nodejs_eventloop_lag_p90_seconds';
const NODEJS_EVENTLOOP_LAG_P99 = 'nodejs_eventloop_lag_p99_seconds';

function reportEventloopLag(start, gauge) {
const delta = process.hrtime(start);
const nanosec = delta[0] * 1e9 + delta[1];
const seconds = nanosec / 1e9;

gauge.set(seconds, Date.now());
gauge.set(seconds);
}

module.exports = (registry, config = {}) => {
const namePrefix = config.prefix ? config.prefix : '';

const gauge = new Gauge({
const lag = new Gauge({
name: namePrefix + NODEJS_EVENTLOOP_LAG,
help: 'Lag of event loop in seconds.',
registers: registry ? [registry] : undefined,
aggregator: 'average'
});
const lagMin = new Gauge({
name: namePrefix + NODEJS_EVENTLOOP_LAG_MIN,
help: 'The minimum recorded event loop delay.',
registers: registry ? [registry] : undefined
});
const lagMax = new Gauge({
name: namePrefix + NODEJS_EVENTLOOP_LAG_MAX,
help: 'The maximum recorded event loop delay.',
registers: registry ? [registry] : undefined
});
const lagMean = new Gauge({
name: namePrefix + NODEJS_EVENTLOOP_LAG_MEAN,
help: 'The mean of the recorded event loop delays.',
registers: registry ? [registry] : undefined
});
const lagStddev = new Gauge({
name: namePrefix + NODEJS_EVENTLOOP_LAG_STDDEV,
help: 'The standard deviation of the recorded event loop delays.',
registers: registry ? [registry] : undefined
});
const lagP50 = new Gauge({
name: namePrefix + NODEJS_EVENTLOOP_LAG_P50,
help: 'The 50th percentile of the recorded event loop delays.',
registers: registry ? [registry] : undefined
});
const lagP90 = new Gauge({
name: namePrefix + NODEJS_EVENTLOOP_LAG_P90,
help: 'The 90th percentile of the recorded event loop delays.',
registers: registry ? [registry] : undefined
});
const lagP99 = new Gauge({
name: namePrefix + NODEJS_EVENTLOOP_LAG_P99,
help: 'The 99th percentile of the recorded event loop delays.',
registers: registry ? [registry] : undefined
});

if (!perf_hooks || !perf_hooks.monitorEventLoopDelay) {
return () => {
const start = process.hrtime();
setImmediate(reportEventloopLag, start, lag);
};
}

const histogram = perf_hooks.monitorEventLoopDelay({
resolution: config.eventLoopMonitoringPrecision
});
histogram.enable();

return () => {
const start = process.hrtime();
setImmediate(reportEventloopLag, start, gauge);
setImmediate(reportEventloopLag, start, lag);

lagMin.set(histogram.min / 1e9);
lagMax.set(histogram.max / 1e9);
lagMean.set(histogram.mean / 1e9);
lagStddev.set(histogram.stddev / 1e9);
lagP50.set(histogram.percentile(50) / 1e9);
lagP90.set(histogram.percentile(90) / 1e9);
lagP99.set(histogram.percentile(99) / 1e9);
};
};

module.exports.metricNames = [NODEJS_EVENTLOOP_LAG];
module.exports.metricNames = [
NODEJS_EVENTLOOP_LAG,
NODEJS_EVENTLOOP_LAG_MIN,
NODEJS_EVENTLOOP_LAG_MAX,
NODEJS_EVENTLOOP_LAG_MEAN,
NODEJS_EVENTLOOP_LAG_STDDEV,
NODEJS_EVENTLOOP_LAG_P50,
NODEJS_EVENTLOOP_LAG_P90,
NODEJS_EVENTLOOP_LAG_P99
];
40 changes: 39 additions & 1 deletion test/metrics/eventLoopLagTest.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,50 @@ describe('eventLoopLag', () => {

setTimeout(() => {
const metrics = register.getMetricsAsJSON();
expect(metrics).toHaveLength(1);
expect(metrics).toHaveLength(8);

expect(metrics[0].help).toEqual('Lag of event loop in seconds.');
expect(metrics[0].type).toEqual('gauge');
expect(metrics[0].name).toEqual('nodejs_eventloop_lag_seconds');

expect(metrics[1].help).toEqual('The minimum recorded event loop delay.');
expect(metrics[1].type).toEqual('gauge');
expect(metrics[1].name).toEqual('nodejs_eventloop_lag_min_seconds');

expect(metrics[2].help).toEqual('The maximum recorded event loop delay.');
expect(metrics[2].type).toEqual('gauge');
expect(metrics[2].name).toEqual('nodejs_eventloop_lag_max_seconds');

expect(metrics[3].help).toEqual(
'The mean of the recorded event loop delays.'
);
expect(metrics[3].type).toEqual('gauge');
expect(metrics[3].name).toEqual('nodejs_eventloop_lag_mean_seconds');

expect(metrics[4].help).toEqual(
'The standard deviation of the recorded event loop delays.'
);
expect(metrics[4].type).toEqual('gauge');
expect(metrics[4].name).toEqual('nodejs_eventloop_lag_stddev_seconds');

expect(metrics[5].help).toEqual(
'The 50th percentile of the recorded event loop delays.'
);
expect(metrics[5].type).toEqual('gauge');
expect(metrics[5].name).toEqual('nodejs_eventloop_lag_p50_seconds');

expect(metrics[6].help).toEqual(
'The 90th percentile of the recorded event loop delays.'
);
expect(metrics[6].type).toEqual('gauge');
expect(metrics[6].name).toEqual('nodejs_eventloop_lag_p90_seconds');

expect(metrics[7].help).toEqual(
'The 99th percentile of the recorded event loop delays.'
);
expect(metrics[7].type).toEqual('gauge');
expect(metrics[7].name).toEqual('nodejs_eventloop_lag_p99_seconds');

done();
}, 5);
});
Expand Down

0 comments on commit c63689b

Please sign in to comment.