From e387185f41d1767122790f3f0a0303fb6216ae71 Mon Sep 17 00:00:00 2001 From: Botond Szirtes Date: Mon, 29 Jul 2024 14:29:14 +0200 Subject: [PATCH] Add an option to expose Prometheus metrics via http/s in the forwarder-vpp Signed-off-by: Botond Szirtes --- README.md | 59 ++++++++++++++++--------------- internal/config/config.go | 28 ++++++++------- internal/imports/imports_linux.go | 1 + main.go | 8 +++++ 4 files changed, 55 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 97fd5035..1bd929b1 100644 --- a/README.md +++ b/README.md @@ -20,34 +20,37 @@ docker build . ## Environment config -* `NSM_NAME` - Name of Endpoint -* `NSM_LABELS` - Labels related to this forwarder-vpp instance -* `NSM_NSNAME` - Name of Network Service to Register with Registry -* `NSM_CONNECT_TO` - url to connect to -* `NSM_LISTEN_ON` - url to listen on -* `NSM_MAX_TOKEN_LIFETIME` - maximum lifetime of tokens -* `NSM_REGISTRY_CLIENT_POLICIES` - paths to files and directories that contain registry client policies -* `NSM_LOG_LEVEL` - Log level -* `NSM_DIAL_TIMEOUT` - Timeout for the dial the next endpoint -* `NSM_OPEN_TELEMETRY_ENDPOINT` - OpenTelemetry Collector Endpoint -* `NSM_METRICS_EXPORT_INTERVAL` - interval between mertics exports -* `NSM_PPROF_ENABLED` - is pprof enabled (default: "false") -* `NSM_PPROF_LISTEN_ON` - pprof URL to ListenAndServe (default: "localhost:6060") -* `NSM_TUNNEL_IP` - IP to use for tunnels -* `NSM_VXLAN_PORT` - VXLAN port to use -* `NSM_VPP_API_SOCKET` - filename of socket to connect to existing VPP instance. -* `NSM_VPP_INIT` - type of VPP initialization. Must be AF_XDP, AF_PACKET or NONE -* `NSM_VPP_INIT_PARAMS` - Configuration file path containing VPP API parameters for initialization -* `NSM_RESOURCE_POLL_TIMEOUT` - device plugin polling timeout -* `NSM_DEVICE_PLUGIN_PATH` - path to the device plugin directory -* `NSM_POD_RESOURCES_PATH` - path to the pod resources directory -* `NSM_DEVICE_SELECTOR_FILE` - config file for device name to label matching -* `NSM_SRIOV_CONFIG_FILE` - PCI resources config path -* `NSM_PCI_DEVICES_PATH` - path to the PCI devices directory -* `NSM_PCI_DRIVERS_PATH` - path to the PCI drivers directory -* `NSM_CGROUP_PATH` - path to the host cgroup directory -* `NSM_VFIO_PATH` - path to the host VFIO directory -* `NSM_MECHANISM_PRIORITY` - sets priorities for mechanisms +* `NSM_NAME` - Name of Endpoint +* `NSM_LABELS` - Labels related to this forwarder-vpp instance +* `NSM_NSNAME` - Name of Network Service to Register with Registry +* `NSM_CONNECT_TO` - url to connect to +* `NSM_LISTEN_ON` - url to listen on +* `NSM_MAX_TOKEN_LIFETIME` - maximum lifetime of tokens +* `NSM_REGISTRY_CLIENT_POLICIES` - paths to files and directories that contain registry client policies +* `NSM_LOG_LEVEL` - Log level +* `NSM_DIAL_TIMEOUT` - Timeout for the dial the next endpoint +* `NSM_OPEN_TELEMETRY_ENDPOINT` - OpenTelemetry Collector Endpoint +* `NSM_METRICS_EXPORT_INTERVAL` - interval between mertics exports +* `NSM_PPROF_ENABLED` - is pprof enabled (default: "false") +* `NSM_PPROF_LISTEN_ON` - pprof URL to ListenAndServe (default: "localhost:6060") +* `NSM_TUNNEL_IP` - IP to use for tunnels +* `NSM_VXLAN_PORT` - VXLAN port to use +* `NSM_VPP_API_SOCKET` - filename of socket to connect to existing VPP instance. +* `NSM_VPP_INIT` - type of VPP initialization. Must be AF_XDP, AF_PACKET or NONE +* `NSM_VPP_INIT_PARAMS` - Configuration file path containing VPP API parameters for initialization +* `NSM_RESOURCE_POLL_TIMEOUT` - device plugin polling timeout +* `NSM_DEVICE_PLUGIN_PATH` - path to the device plugin directory +* `NSM_POD_RESOURCES_PATH` - path to the pod resources directory +* `NSM_DEVICE_SELECTOR_FILE` - config file for device name to label matching +* `NSM_SRIOV_CONFIG_FILE` - PCI resources config path +* `NSM_PCI_DEVICES_PATH` - path to the PCI devices directory +* `NSM_PCI_DRIVERS_PATH` - path to the PCI drivers directory +* `NSM_CGROUP_PATH` - path to the host cgroup directory +* `NSM_VFIO_PATH` - path to the host VFIO directory +* `NSM_MECHANISM_PRIORITY` - sets priorities for mechanisms +* `NSM_PROMETHEUS_IP` - sets the IP of the metrics server if Prometheus is used +* `NSM_PROMETHEUS_PORT` - sets the port of the metrics server if Prometheus is used +* `NSM_PROMETHEUS_SERVER_HEADER_TIMEOUT` - sets the header timeout for the metrics server if Prometheus is used # Testing diff --git a/internal/config/config.go b/internal/config/config.go index cbf439bd..32e6cb22 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -36,19 +36,21 @@ import ( // Config - configuration for cmd-forwarder-vpp type Config struct { - Name string `default:"forwarder" desc:"Name of Endpoint"` - Labels map[string]string `default:"p2p:true" desc:"Labels related to this forwarder-vpp instance"` - NSName string `default:"forwarder" desc:"Name of Network Service to Register with Registry"` - ConnectTo url.URL `default:"unix:///connect.to.socket" desc:"url to connect to" split_words:"true"` - ListenOn url.URL `default:"unix:///listen.on.socket" desc:"url to listen on" split_words:"true"` - MaxTokenLifetime time.Duration `default:"10m" desc:"maximum lifetime of tokens" split_words:"true"` - RegistryClientPolicies []string `default:"etc/nsm/opa/common/.*.rego,etc/nsm/opa/registry/.*.rego,etc/nsm/opa/client/.*.rego" desc:"paths to files and directories that contain registry client policies" split_words:"true"` - LogLevel string `default:"INFO" desc:"Log level" split_words:"true"` - DialTimeout time.Duration `default:"750ms" desc:"Timeout for the dial the next endpoint" split_words:"true"` - OpenTelemetryEndpoint string `default:"otel-collector.observability.svc.cluster.local:4317" desc:"OpenTelemetry Collector Endpoint" split_words:"true"` - MetricsExportInterval time.Duration `default:"10s" desc:"interval between mertics exports" split_words:"true"` - PprofEnabled bool `default:"false" desc:"is pprof enabled" split_words:"true"` - PprofListenOn string `default:"localhost:6060" desc:"pprof URL to ListenAndServe" split_words:"true"` + Name string `default:"forwarder" desc:"Name of Endpoint"` + Labels map[string]string `default:"p2p:true" desc:"Labels related to this forwarder-vpp instance"` + NSName string `default:"forwarder" desc:"Name of Network Service to Register with Registry"` + ConnectTo url.URL `default:"unix:///connect.to.socket" desc:"url to connect to" split_words:"true"` + ListenOn url.URL `default:"unix:///listen.on.socket" desc:"url to listen on" split_words:"true"` + MaxTokenLifetime time.Duration `default:"10m" desc:"maximum lifetime of tokens" split_words:"true"` + RegistryClientPolicies []string `default:"etc/nsm/opa/common/.*.rego,etc/nsm/opa/registry/.*.rego,etc/nsm/opa/client/.*.rego" desc:"paths to files and directories that contain registry client policies" split_words:"true"` + LogLevel string `default:"INFO" desc:"Log level" split_words:"true"` + DialTimeout time.Duration `default:"750ms" desc:"Timeout for the dial the next endpoint" split_words:"true"` + OpenTelemetryEndpoint string `default:"otel-collector.observability.svc.cluster.local:4317" desc:"OpenTelemetry Collector Endpoint" split_words:"true"` + MetricsExportInterval time.Duration `default:"10s" desc:"interval between mertics exports" split_words:"true"` + PprofEnabled bool `default:"false" desc:"is pprof enabled" split_words:"true"` + PprofListenOn string `default:"localhost:6060" desc:"pprof URL to ListenAndServe" split_words:"true"` + PrometheusListenOn string `default:":8081" desc:"Prometheus URL to ListenAndServe" split_words:"true"` + PrometheusServerHeaderTimeout time.Duration `default:"5s" desc:"Timeout for how long the Prometheus server waits for complete request headers from the client" split_words:"true"` TunnelIP net.IP `desc:"IP to use for tunnels" split_words:"true"` VxlanPort uint16 `default:"0" desc:"VXLAN port to use" split_words:"true"` diff --git a/internal/imports/imports_linux.go b/internal/imports/imports_linux.go index eaeedc76..6467636e 100644 --- a/internal/imports/imports_linux.go +++ b/internal/imports/imports_linux.go @@ -86,6 +86,7 @@ import ( _ "github.com/networkservicemesh/sdk/pkg/tools/monitorconnection/authorize" _ "github.com/networkservicemesh/sdk/pkg/tools/opentelemetry" _ "github.com/networkservicemesh/sdk/pkg/tools/pprofutils" + _ "github.com/networkservicemesh/sdk/pkg/tools/prometheus" _ "github.com/networkservicemesh/sdk/pkg/tools/spiffejwt" _ "github.com/networkservicemesh/sdk/pkg/tools/spire" _ "github.com/networkservicemesh/sdk/pkg/tools/token" diff --git a/main.go b/main.go index 68e6a2fa..54de9f33 100644 --- a/main.go +++ b/main.go @@ -60,6 +60,7 @@ import ( authmonitor "github.com/networkservicemesh/sdk/pkg/tools/monitorconnection/authorize" "github.com/networkservicemesh/sdk/pkg/tools/opentelemetry" "github.com/networkservicemesh/sdk/pkg/tools/pprofutils" + "github.com/networkservicemesh/sdk/pkg/tools/prometheus" "github.com/networkservicemesh/sdk/pkg/tools/spiffejwt" "github.com/networkservicemesh/sdk/pkg/tools/token" "github.com/networkservicemesh/sdk/pkg/tools/tracing" @@ -150,6 +151,13 @@ func main() { }() } + // ******************************************************************************** + // Configure Prometheus + // ******************************************************************************** + if prometheus.IsEnabled() { + go prometheus.ListenAndServe(ctx, cfg.PrometheusListenOn, cfg.PrometheusServerHeaderTimeout, cancel) + } + // ******************************************************************************** // Configure pprof // ********************************************************************************