Skip to content

Commit

Permalink
feat: more metrics (#7769)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexghr authored Aug 7, 2024
1 parent f7f04e8 commit 048a085
Show file tree
Hide file tree
Showing 19 changed files with 244 additions and 104 deletions.
8 changes: 4 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ services:
P2P_ENABLED: true
PEER_ID_PRIVATE_KEY:
AZTEC_PORT: 8999
TEL_COLLECTOR_BASE_URL: ${TEL_COLLECTOR_BASE_URL:-http://otel-collector:4318}
OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://otel-collector:4318}
secrets:
- ethereum-host
- p2p-boot-node
Expand All @@ -77,13 +77,13 @@ services:
# if the stack is started with --profile metrics --profile node, give the collector a chance to start before the node
i=0
max=3
while ! curl --head --silent $$TEL_COLLECTOR_BASE_URL > /dev/null; do
while ! curl --head --silent $$OTEL_EXPORTER_OTLP_ENDPOINT > /dev/null; do
echo "OpenTelemetry collector not up. Retrying after 1s";
sleep 1;
i=$$((i+1));
if [ $$i -eq $$max ]; then
echo "OpenTelemetry collector at $$TEL_COLLECTOR_BASE_URL not up after $${max}s. Running without metrics";
unset TEL_COLLECTOR_BASE_URL;
echo "OpenTelemetry collector at $$OTEL_EXPORTER_OTLP_ENDPOINT not up after $${max}s. Running without metrics";
unset OTEL_EXPORTER_OTLP_ENDPOINT;
break
fi;
done;
Expand Down
7 changes: 6 additions & 1 deletion yarn-project/archiver/src/archiver/archiver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import { type EthAddress } from '@aztec/foundation/eth-address';
import { Fr } from '@aztec/foundation/fields';
import { type DebugLogger, createDebugLogger } from '@aztec/foundation/log';
import { RunningPromise } from '@aztec/foundation/running-promise';
import { Timer } from '@aztec/foundation/timer';
import { ClassRegistererAddress } from '@aztec/protocol-contracts/class-registerer';
import { type TelemetryClient } from '@aztec/telemetry-client';
import {
Expand Down Expand Up @@ -291,8 +292,12 @@ export class Archiver implements ArchiveSource {
);

if (retrievedBlocks.retrievedData.length > 0) {
const timer = new Timer();
await this.store.addBlocks(retrievedBlocks);
this.instrumentation.processNewBlocks(retrievedBlocks.retrievedData);
this.instrumentation.processNewBlocks(
timer.ms() / retrievedBlocks.retrievedData.length,
retrievedBlocks.retrievedData,
);
const lastL2BlockNumber = retrievedBlocks.retrievedData[retrievedBlocks.retrievedData.length - 1].number;
this.log.verbose(`Processed ${retrievedBlocks.retrievedData.length} new L2 blocks up to ${lastL2BlockNumber}`);
}
Expand Down
27 changes: 21 additions & 6 deletions yarn-project/archiver/src/archiver/instrumentation.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
import { type L2Block } from '@aztec/circuit-types';
import { type Gauge, type Histogram, Metrics, type TelemetryClient, ValueType } from '@aztec/telemetry-client';
import {
type Gauge,
type Histogram,
Metrics,
type TelemetryClient,
ValueType,
exponentialBuckets,
} from '@aztec/telemetry-client';

export class ArchiverInstrumentation {
private blockHeight: Gauge;
private blockSize: Histogram;
private blockSize: Gauge;
private syncDuration: Histogram;

constructor(telemetry: TelemetryClient) {
const meter = telemetry.getMeter('Archiver');
Expand All @@ -12,16 +20,23 @@ export class ArchiverInstrumentation {
valueType: ValueType.INT,
});

this.blockSize = meter.createHistogram(Metrics.ARCHIVER_BLOCK_SIZE, {
description: 'The number of transactions processed per block',
this.blockSize = meter.createGauge(Metrics.ARCHIVER_BLOCK_SIZE, {
description: 'The number of transactions in a block',
valueType: ValueType.INT,
});

this.syncDuration = meter.createHistogram(Metrics.ARCHIVER_SYNC_DURATION, {
unit: 'ms',
description: 'Duration to sync a block',
valueType: ValueType.INT,
advice: {
explicitBucketBoundaries: [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192],
explicitBucketBoundaries: exponentialBuckets(1, 16),
},
});
}

public processNewBlocks(blocks: L2Block[]) {
public processNewBlocks(syncTimePerBlock: number, blocks: L2Block[]) {
this.syncDuration.record(syncTimePerBlock);
this.blockHeight.record(Math.max(...blocks.map(b => b.number)));
for (const block of blocks) {
this.blockSize.record(block.body.txEffects.length);
Expand Down
6 changes: 3 additions & 3 deletions yarn-project/aztec/terraform/node/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -337,11 +337,11 @@ resource "aws_ecs_task_definition" "aztec-node" {
value = tostring(var.PROVING_ENABLED)
},
{
name = "TEL_COLLECTOR_BASE_URL"
name = "OTEL_EXPORTER_OTLP_ENDPOINT"
value = "http://aztec-otel.local:4318"
},
{
name = "TEL_SERVICE_NAME"
name = "OTEL_SERVICE_NAME"
value = "${var.DEPLOY_TAG}-aztec-node-${count.index + 1}"
},
{
Expand All @@ -357,7 +357,7 @@ resource "aws_ecs_task_definition" "aztec-node" {
value = "info"
},
{
name = "TEL_NETWORK_ID",
name = "NETWORK_NAME",
value = "${var.DEPLOY_TAG}"
}
]
Expand Down
6 changes: 3 additions & 3 deletions yarn-project/aztec/terraform/prover/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -270,15 +270,15 @@ resource "aws_ecs_task_definition" "aztec-proving-agent" {
"value": "${var.PROVING_ENABLED}"
},
{
"name": "TEL_COLLECTOR_BASE_URL",
"name": "OTEL_EXPORTER_OTLP_ENDPOINT",
"value": "http://aztec-otel.local:4318"
},
{
"name": "TEL_SERVICE_NAME",
"name": "OTEL_SERVICE_NAME",
"value": "${var.DEPLOY_TAG}-aztec-proving-agent-group-${count.index + 1}"
},
{
"name": "TEL_NETWORK_ID",
"name": "NETWORK_NAME",
"value": "${var.DEPLOY_TAG}"
}
],
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/bb-prover/src/prover/bb_prover.ts
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ export class BBNativeRollupProver implements ServerCircuitProver {
this.instrumentation.recordSize('witGenInputSize', circuitName, input.toBuffer().length);
this.instrumentation.recordSize('witGenOutputSize', circuitName, output.toBuffer().length);

logger.debug(`Generated witness`, {
logger.info(`Generated witness`, {
circuitName,
duration: timer.ms(),
inputSize: input.toBuffer().length,
Expand Down
7 changes: 3 additions & 4 deletions yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,9 @@ export type EnvVar =
| 'P2P_QUERY_FOR_IP'
| 'P2P_TX_POOL_KEEP_PROVEN_FOR'
| 'TELEMETRY'
| 'TEL_COLLECTOR_BASE_URL'
| 'TEL_SERVICE_NAME'
| 'TEL_SERVICE_VERSION'
| 'TEL_NETWORK_ID'
| 'OTEL_EXPORTER_OTLP_ENDPOINT'
| 'OTEL_SERVICE_NAME'
| 'NETWORK_NAME'
| 'NETWORK'
| 'API_KEY'
| 'AZTEC_NODE_URL'
Expand Down
8 changes: 6 additions & 2 deletions yarn-project/p2p/src/tx_pool/aztec_kv_tx_pool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,16 @@ export class AztecKVTxPool implements TxPool {

public markAsMined(txHashes: TxHash[]): Promise<void> {
return this.#store.transaction(() => {
let deleted = 0;
for (const hash of txHashes) {
const key = hash.toString();
void this.#minedTxs.add(key);
void this.#pendingTxs.delete(key);
if (this.#pendingTxs.has(key)) {
deleted++;
void this.#pendingTxs.delete(key);
}
}
this.#metrics.recordRemovedTxs('pending', txHashes.length);
this.#metrics.recordRemovedTxs('pending', deleted);
this.#metrics.recordAddedTxs('mined', txHashes.length);
});
}
Expand Down
57 changes: 41 additions & 16 deletions yarn-project/sequencer-client/src/sequencer/metrics.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import {
Attributes,
type Gauge,
type Histogram,
Metrics,
type TelemetryClient,
Expand All @@ -9,20 +10,21 @@ import {
millisecondBuckets,
} from '@aztec/telemetry-client';

type SequencerStateCallback = () => number;

export class SequencerMetrics {
public readonly tracer: Tracer;

private cancelledBlockCounter: UpDownCounter;
private blocksBuiltCounter: UpDownCounter;
private blockCounter: UpDownCounter;
private blockBuildDuration: Histogram;
private blockTxCount: Histogram;
private currentBlockNumber: Gauge;
private currentBlockSize: Gauge;

constructor(client: TelemetryClient, name = 'Sequencer') {
constructor(client: TelemetryClient, getState: SequencerStateCallback, name = 'Sequencer') {
const meter = client.getMeter(name);
this.tracer = client.getTracer(name);

this.cancelledBlockCounter = meter.createUpDownCounter(Metrics.SEQUENCER_BLOCK_BUILD_CANCELLED_COUNT);
this.blocksBuiltCounter = meter.createUpDownCounter(Metrics.SEQUENCER_BLOCK_BUILD_COUNT);
this.blockCounter = meter.createUpDownCounter(Metrics.SEQUENCER_BLOCK_COUNT);
this.blockBuildDuration = meter.createHistogram(Metrics.SEQUENCER_BLOCK_BUILD_DURATION, {
unit: 'ms',
description: 'Duration to build a block',
Expand All @@ -32,30 +34,53 @@ export class SequencerMetrics {
},
});

this.blockTxCount = meter.createHistogram(Metrics.SEQUENCER_BLOCK_BUILD_TX_COUNT, {
description: 'Number of transactions in a block',
valueType: ValueType.INT,
const currentState = meter.createObservableGauge(Metrics.SEQUENCER_CURRENT_STATE, {
description: 'Current state of the sequencer',
});

currentState.addCallback(observer => {
observer.observe(getState());
});

this.currentBlockNumber = meter.createGauge(Metrics.SEQUENCER_CURRENT_BLOCK_NUMBER, {
description: 'Current block number',
});

this.currentBlockSize = meter.createGauge(Metrics.SEQUENCER_CURRENT_BLOCK_SIZE, {
description: 'Current block number',
});

this.setCurrentBlock(0, 0);
}

recordCancelledBlock() {
this.cancelledBlockCounter.add(1);
this.blockCounter.add(1, {
[Attributes.STATUS]: 'cancelled',
});
this.setCurrentBlock(0, 0);
}

recordPublishedBlock(buildDurationMs: number) {
this.blocksBuiltCounter.add(1, {
[Attributes.OK]: true,
this.blockCounter.add(1, {
[Attributes.STATUS]: 'published',
});
this.blockBuildDuration.record(Math.ceil(buildDurationMs));
this.setCurrentBlock(0, 0);
}

recordFailedBlock() {
this.blocksBuiltCounter.add(1, {
[Attributes.OK]: false,
this.blockCounter.add(1, {
[Attributes.STATUS]: 'failed',
});
this.setCurrentBlock(0, 0);
}

recordNewBlock(blockNumber: number, txCount: number) {
this.setCurrentBlock(blockNumber, txCount);
}

recordNewBlock(txCount: number) {
this.blockTxCount.record(txCount);
private setCurrentBlock(blockNumber: number, txCount: number) {
this.currentBlockNumber.record(blockNumber);
this.currentBlockSize.record(txCount);
}
}
8 changes: 5 additions & 3 deletions yarn-project/sequencer-client/src/sequencer/sequencer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ export class Sequencer {
private log = createDebugLogger('aztec:sequencer'),
) {
this.updateConfig(config);
this.metrics = new SequencerMetrics(telemetry, 'Sequencer');
this.metrics = new SequencerMetrics(telemetry, () => this.state, 'Sequencer');
this.log.verbose(`Initialized sequencer with ${this.minTxsPerBLock}-${this.maxTxsPerBlock} txs per block.`);
}

Expand Down Expand Up @@ -284,7 +284,7 @@ export class Sequencer {
historicalHeader: Header | undefined,
elapsedSinceLastBlock: number,
): Promise<void> {
this.metrics.recordNewBlock(validTxs.length);
this.metrics.recordNewBlock(newGlobalVariables.blockNumber.toNumber(), validTxs.length);
const workTimer = new Timer();
this.state = SequencerState.CREATING_BLOCK;
this.log.info(`Building block ${newGlobalVariables.blockNumber.toNumber()} with ${validTxs.length} transactions`);
Expand Down Expand Up @@ -368,7 +368,9 @@ export class Sequencer {
try {
await this.publishL2Block(block);
this.metrics.recordPublishedBlock(workDuration);
this.log.info(`Submitted rollup block ${block.number} with ${processedTxs.length} transactions`);
this.log.info(
`Submitted rollup block ${block.number} with ${processedTxs.length} transactions duration=${workDuration}ms`,
);
} catch (err) {
this.metrics.recordFailedBlock();
throw err;
Expand Down
1 change: 1 addition & 0 deletions yarn-project/telemetry-client/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"@opentelemetry/exporter-metrics-otlp-http": "^0.52.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.52.0",
"@opentelemetry/host-metrics": "^0.35.2",
"@opentelemetry/resource-detector-aws": "^1.5.2",
"@opentelemetry/resources": "^1.25.0",
"@opentelemetry/sdk-metrics": "^1.25.0",
"@opentelemetry/sdk-trace-node": "^1.25.0",
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/telemetry-client/src/attributes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/

/** The Aztec network identifier */
export const NETWORK_ID = 'aztec.network.id';
export const NETWORK_NAME = 'aztec.network_name';

/**
* The name of the protocol circuit being run (e.g. public-kernel-setup or base-rollup)
Expand Down
19 changes: 19 additions & 0 deletions yarn-project/telemetry-client/src/aztec_resource_detector.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { type DetectorSync, type IResource, Resource } from '@opentelemetry/resources';

import { NETWORK_NAME } from './attributes.js';
import { getConfigEnvVars } from './config.js';

/**
* Detector for custom Aztec attributes
*/
class AztecDetector implements DetectorSync {
detect(): IResource {
const config = getConfigEnvVars();

return new Resource({
[NETWORK_NAME]: config.networkName,
});
}
}

export const aztecDetector = new AztecDetector();
29 changes: 29 additions & 0 deletions yarn-project/telemetry-client/src/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import { type ConfigMappingsType, getConfigFromMappings } from '@aztec/foundation/config';

export interface TelemetryClientConfig {
collectorBaseUrl?: URL;
serviceName: string;
networkName: string;
}

export const telemetryClientConfigMappings: ConfigMappingsType<TelemetryClientConfig> = {
collectorBaseUrl: {
env: 'OTEL_EXPORTER_OTLP_ENDPOINT',
description: 'The URL of the telemetry collector',
parseEnv: (val: string) => new URL(val),
},
serviceName: {
env: 'OTEL_SERVICE_NAME',
description: 'The URL of the telemetry collector',
default: 'aztec',
},
networkName: {
env: 'NETWORK_NAME',
description: 'The network ID of the telemetry service',
default: 'local',
},
};

export function getConfigEnvVars(): TelemetryClientConfig {
return getConfigFromMappings<TelemetryClientConfig>(telemetryClientConfigMappings);
}
10 changes: 6 additions & 4 deletions yarn-project/telemetry-client/src/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,18 @@ export const CIRCUIT_SIZE = 'aztec.circuit.size';
export const MEMPOOL_TX_COUNT = 'aztec.mempool.tx_count';
export const MEMPOOL_TX_SIZE = 'aztec.mempool.tx_size';

export const ARCHIVER_SYNC_DURATION = 'aztec.archiver.sync_duration';
export const ARCHIVER_BLOCK_HEIGHT = 'aztec.archiver.block_height';
export const ARCHIVER_BLOCK_SIZE = 'aztec.archiver.block_size';

export const NODE_RECEIVE_TX_DURATION = 'aztec.node.receive_tx.duration';
export const NODE_RECEIVE_TX_COUNT = 'aztec.node.receive_tx.count';

export const SEQUENCER_BLOCK_BUILD_DURATION = 'aztec.sequencer.block_build.duration';
export const SEQUENCER_BLOCK_BUILD_COUNT = 'aztec.sequencer.block_build.ok_count';
export const SEQUENCER_BLOCK_BUILD_CANCELLED_COUNT = 'aztec.sequencer.block_build.cancelled_count';
export const SEQUENCER_BLOCK_BUILD_TX_COUNT = 'aztec.sequencer.block_build.tx_count';
export const SEQUENCER_BLOCK_BUILD_DURATION = 'aztec.sequencer.block.build_duration';
export const SEQUENCER_BLOCK_COUNT = 'aztec.sequencer.block.count';
export const SEQUENCER_CURRENT_STATE = 'aztec.sequencer.current.state';
export const SEQUENCER_CURRENT_BLOCK_NUMBER = 'aztec.sequencer.current.block_number';
export const SEQUENCER_CURRENT_BLOCK_SIZE = 'aztec.sequencer.current.block_size';

export const L1_PUBLISHER_GAS_PRICE = 'aztec.l1_publisher.gas_price';
export const L1_PUBLISHER_TX_COUNT = 'aztec.l1_publisher.tx_count';
Expand Down
Loading

0 comments on commit 048a085

Please sign in to comment.