Skip to content

Commit

Permalink
fix: add more metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelfig committed Feb 15, 2021
1 parent dd63a41 commit e3223fb
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@ package swingset

import (
"encoding/json"
"time"

"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
sdkerrors "github.com/cosmos/cosmos-sdk/types/errors"
abci "github.com/tendermint/tendermint/abci/types"

"github.com/Agoric/agoric-sdk/golang/cosmos/x/swingset/types"
)

type beginBlockAction struct {
Expand All @@ -30,6 +34,8 @@ type commitBlockAction struct {
}

func BeginBlock(ctx sdk.Context, req abci.RequestBeginBlock, keeper Keeper) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker)

action := &beginBlockAction{
Type: "BEGIN_BLOCK",
StoragePort: GetPort("storage"),
Expand All @@ -52,6 +58,7 @@ var endBlockHeight int64
var endBlockTime int64

func EndBlock(ctx sdk.Context, req abci.RequestEndBlock, keeper Keeper) ([]abci.ValidatorUpdate, error) {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyEndBlocker)
action := &endBlockAction{
Type: "END_BLOCK",
BlockHeight: ctx.BlockHeight(),
Expand Down Expand Up @@ -80,6 +87,8 @@ func EndBlock(ctx sdk.Context, req abci.RequestEndBlock, keeper Keeper) ([]abci.
}

func CommitBlock(keeper Keeper) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), "commit_blocker")

action := &commitBlockAction{
Type: "COMMIT_BLOCK",
BlockHeight: endBlockHeight,
Expand Down
26 changes: 24 additions & 2 deletions packages/agoric-cli/lib/chain-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@ export const ORIG_SIGNED_BLOCKS_WINDOW = 100;

export const DEFAULT_GRPC_PORT = 9090;
export const DEFAULT_RPC_PORT = 26657;
export const DEFAULT_PROM_PORT = 26660;
export const DEFAULT_API_PORT = 1317;

// Rewrite the app.toml.
export function finishCosmosApp({ appToml, portNum = `${DEFAULT_RPC_PORT}` }) {
export function finishCosmosApp({
appToml,
exportMetrics,
portNum = `${DEFAULT_RPC_PORT}`,
}) {
const rpcPort = Number(portNum);
const app = TOML.parse(appToml);

Expand All @@ -32,12 +38,22 @@ export function finishCosmosApp({ appToml, portNum = `${DEFAULT_RPC_PORT}` }) {
app['pruning-keep-recent'] = '10000';
app['pruning-keep-every'] = '50000';
app['pruning-interval'] = '1000';

const apiPort = DEFAULT_API_PORT + (rpcPort - DEFAULT_RPC_PORT) / 100;
if (exportMetrics) {
app.api.laddr = `tcp://0.0.0.0:${apiPort}`;
app.api.enable = true;
app.telemetry.enabled = true;
app.telemetry['prometheus-retention-time'] = 60;
}

return TOML.stringify(app);
}

// Rewrite the config.toml.
export function finishCosmosConfig({
export function finishTendermintConfig({
configToml,
exportMetrics,
portNum = `${DEFAULT_RPC_PORT}`,
persistentPeers = '',
}) {
Expand All @@ -57,6 +73,12 @@ export function finishCosmosConfig({
config.rpc.laddr = `tcp://0.0.0.0:${rpcPort}`;
config.rpc.max_body_bytes = 15 * 10 ** 6;

if (exportMetrics) {
const promPort = rpcPort - DEFAULT_RPC_PORT + DEFAULT_PROM_PORT;
config.instrumentation.prometheus = true;
config.instrumentation.prometheus_listen_addr = `:${promPort}`;
}

// Needed for IBC.
config.tx_index.index_all_keys = true;

Expand Down
5 changes: 5 additions & 0 deletions packages/agoric-cli/lib/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ const main = async (progname, rawArgs, powers) => {
'set the config.toml p2p.persistent_peers value',
'',
)
.option(
'--export-metrics',
'open ports to export Prometheus metrics',
false,
)
.action(async (prog, configDir, cmd) => {
const opts = { ...program.opts(), ...cmd.opts() };
return subMain(setDefaultsMain, ['set-defaults', prog, configDir], opts);
Expand Down
8 changes: 6 additions & 2 deletions packages/agoric-cli/lib/set-defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { basename } from 'path';
import { assert, details as X } from '@agoric/assert';
import {
finishCosmosApp,
finishCosmosConfig,
finishTendermintConfig,
finishCosmosGenesis,
} from './chain-config';

Expand All @@ -17,6 +17,8 @@ export default async function setDefaultsMain(progname, rawArgs, powers, opts) {
X`<prog> must currently be 'ag-chain-cosmos'`,
);

const { exportMetrics } = opts;

let appFile;
let configFile;
let genesisFile;
Expand Down Expand Up @@ -49,6 +51,7 @@ export default async function setDefaultsMain(progname, rawArgs, powers, opts) {

const newAppToml = finishCosmosApp({
appToml,
exportMetrics,
});
await create(appFile, newAppToml);
}
Expand All @@ -58,9 +61,10 @@ export default async function setDefaultsMain(progname, rawArgs, powers, opts) {
const { persistentPeers } = opts;
const configToml = await fs.readFile(configFile, 'utf-8');

const newConfigToml = finishCosmosConfig({
const newConfigToml = finishTendermintConfig({
configToml,
persistentPeers,
exportMetrics,
});
await create(configFile, newConfigToml);
}
Expand Down
4 changes: 2 additions & 2 deletions packages/agoric-cli/lib/start.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { createHash } from 'crypto';
import {
STAKING_DENOM,
MINT_DENOM,
finishCosmosConfig,
finishTendermintConfig,
finishCosmosGenesis,
finishCosmosApp,
} from './chain-config';
Expand Down Expand Up @@ -314,7 +314,7 @@ export default async function startMain(progname, rawArgs, powers, opts) {
const newGenesisJson = finishCosmosGenesis({
genesisJson,
});
const newConfigToml = finishCosmosConfig({
const newConfigToml = finishTendermintConfig({
configToml,
portNum,
});
Expand Down
5 changes: 3 additions & 2 deletions packages/cosmic-swingset/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,13 @@ scenario2-setup-nobuild:
$(AGC) --home=t1/n0 --keyring-dir=t1/bootstrap gentx --keyring-backend=test bootstrap 1000000uagstake --chain-id=$(CHAIN_ID)
$(AGC) --home=t1/n0 collect-gentxs
$(AGC) --home=t1/n0 validate-genesis
../agoric-cli/bin/agoric set-defaults ag-chain-cosmos t1/n0/config
../agoric-cli/bin/agoric set-defaults --export-metrics ag-chain-cosmos t1/n0/config
# Set the chain address in all the ag-solos.
$(MAKE) set-local-gci-ingress

scenario2-run-chain:
$(AGC) `$(BREAK_CHAIN) && echo --inspect-brk` --home=t1/n0 start --log_level=warn
OTEL_EXPORTER_PROMETHEUS_PORT=9464 \
$(AGC) `$(BREAK_CHAIN) && echo --inspect-brk` --home=t1/n0 start --log_level=warn

# Provision and start a client.
scenario2-run-client: t1-provision-one-with-powers t1-start-ag-solo
Expand Down
82 changes: 82 additions & 0 deletions packages/cosmic-swingset/README-telemetry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Cosmic SwingSet Telemetry

The Cosmic SwingSet chain node (`ag-chain-cosmos`) is instrumented to export
useful metrics via [Prometheus](https://prometheus.io/) endpoints on different
inbound TCP ports.

These metrics apply both to validators and regular full nodes.

The following sections explain how to enable each of these metric exporters.
You can enable telemetry just be setting the configuration, then by restarting
your node, which will quickly catch up to where it was. Enabling or disabling
telemetry does not affect the correctness of your node.

## Caveats

**NOTE:** the exposed metric ports may need additional firewall rules to accept
TCP connections from your Prometheus host's IP address.

## Agoric VM (SwingSet) metrics

SwingSet is responsible for the chain's Javascript execution. It is
instrumented with the [OpenTelemetry](https://opentelemetry.io/) (*OTEL*)
system.

To enable the Prometheus exporter, set the desired listening TCP port number in
the `$OTEL_EXPORTER_PROMETHEUS_PORT` environment variable before running the
node. To listen on http://0.0.0.0:9464/metrics use:

```sh
OTEL_EXPORTER_PROMETHEUS_PORT=9464 ag-chain-cosmos start ...
```

You can choose a different host than `0.0.0.0` by setting the
`$OTEL_EXPORTER_PROMETHEUS_HOST` environment variable.

## Cosmos SDK metrics

The [Cosmos SDK](https://docs.cosmos.network/) layer of the system is
responsible for transaction processing, as well as forwarding to the SwingSet layerdispatching and processing.

To enable the exporting of Cosmos metrics, you need to change the contents of
your `~/.ag-chain-cosmos/config/app.toml` (**not** `config.toml`) in the
`[telemetry]` section:

```toml
[telemetry]
enabled = true
prometheus-retention-time = 60

[api]
# Note: this key is "enable" (without a "d", not "enabled")
enable = true
address = "tcp://0.0.0.0:1317"
```

If the API server is enabled, then this will export metrics at
http://0.0.0.0:1317/metrics?format=prometheus (at your enabled API server port).
The metrics will also be exported at the Tendermint Prometheus port if enabled
in the next section.

The exported metrics are listed at:
https://docs.cosmos.network/v0.40/core/telemetry.html

## Tendermint metrics

The [Tendermint Core](https://tendermint.com/core/) layer of the system is
responsible for the basic blockchain functionality (BFT consensus and validator
sets). It forwards transactions to the Cosmos SDK.

To enable the exporting of Tendermint metrics, you need to change the contents
of your `~/.ag-chain-cosmos/config/config.toml` (**not** `app.toml`) in the
`[instrumentation]` section:

```toml
[instrumentation]
prometheus = true
prometheus_listen_addr = ":26660"
```

This will export Tendermint metrics at http://0.0.0.0:26660/metrics The metrics
will also be exported at the Cosmos SDK API server port if enabled in the
previous section.

0 comments on commit e3223fb

Please sign in to comment.