Skip to content

Commit

Permalink
feat: Allow custom prometheus info metric (#83)
Browse files Browse the repository at this point in the history
#### Motivation
Allow the generation of an "info" metric by way of environment variables captured as `Gauge` metric set up during container startup

#### Modifications
- Added environment variable `MMESH_CUSTOM_ENV_VAR` in `ModelMeshEnvVars`
- Added map `infoMetricParams` and logic to parse and pass the info to prometheus in `ModelMesh`
- Added a `Gauge` using the parsed label names and values in `Metrics`
- Added sample variables to `pom.xml` for testing and related test in `ModelMeshMetricsTest`

#### Result
- Support to pass information by via environment variables parsed as `<metricname>[;label1=envVarWithValueforLabel1,label2=envVarWithValueforLabel2,...,labelN=envVarWithValueforLabelN,]` if provided.



Signed-off-by: Rafael Vasquez <raf.vasquez@ibm.com>
  • Loading branch information
rafvasq authored Mar 14, 2023
1 parent eb384db commit d1d0156
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 12 deletions.
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@
</argLine>
<!-- required to workaround issue with openjdk 8u181-b13-2 -->
<useSystemClassLoader>false</useSystemClassLoader>
<environmentVariables>
<MM_INFO_METRICS>assistant_deployment_info:relabel;deployment=DEPLOYMENT_NAME,slot=SLOT_NAME,component=COMPONENT_NAME,group=GROUP_NAME</MM_INFO_METRICS>
<DEPLOYMENT_NAME>ga-tf-mm</DEPLOYMENT_NAME>
<SLOT_NAME>ga</SLOT_NAME>
<COMPONENT_NAME>tf-mm</COMPONENT_NAME>
<GROUP_NAME>clu</GROUP_NAME>
</environmentVariables>
</configuration>
</plugin>

Expand Down
33 changes: 26 additions & 7 deletions src/main/java/com/ibm/watson/modelmesh/Metrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,20 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.lang.reflect.Array;
import java.net.SocketAddress;
import java.nio.channels.DatagramChannel;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.*;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;

import static com.ibm.watson.modelmesh.Metric.*;
import static com.ibm.watson.modelmesh.ModelMesh.M;
import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_CUSTOM_ENV_VAR;
import static com.ibm.watson.modelmesh.ModelMeshEnvVars.MMESH_METRICS_ENV_VAR;
import static java.util.concurrent.TimeUnit.*;

/**
Expand Down Expand Up @@ -150,12 +149,14 @@ final class PrometheusMetrics implements Metrics {
5000, 10000, 20000, 60000, 120000, 300000
};

private static final int INFO_METRICS_MAX = 5;

private final CollectorRegistry registry;
private final NettyServer metricServer;
private final boolean shortNames;
private final EnumMap<Metric, Collector> metricsMap = new EnumMap<>(Metric.class);

public PrometheusMetrics(Map<String, String> params) throws Exception {
public PrometheusMetrics(Map<String, String> params, Map<String, String> infoMetricParams) throws Exception {
int port = 2112;
boolean shortNames = true;
boolean https = true;
Expand Down Expand Up @@ -230,6 +231,24 @@ public PrometheusMetrics(Map<String, String> params) throws Exception {
}
}

if (infoMetricParams != null && !infoMetricParams.isEmpty()){
if (infoMetricParams.size() > INFO_METRICS_MAX) {
throw new Exception("Too many info metrics provided in env var " + MMESH_CUSTOM_ENV_VAR + ": \""
+ infoMetricParams+ "\". The max is " + INFO_METRICS_MAX);
}

String metric_name = infoMetricParams.remove("metric_name");
String[] labelNames = infoMetricParams.keySet().toArray(String[]::new);
String[] labelValues = Stream.of(labelNames).map(infoMetricParams::get).toArray(String[]::new);
Gauge infoMetricsGauge = Gauge.build()
.name(metric_name)
.help("Info Metrics")
.labelNames(labelNames)
.create();
infoMetricsGauge.labels(labelValues).set(1.0);
registry.register(infoMetricsGauge);
}

this.metricServer = new NettyServer(registry, port, https);
this.shortNames = shortNames;

Expand Down
32 changes: 30 additions & 2 deletions src/main/java/com/ibm/watson/modelmesh/ModelMesh.java
Original file line number Diff line number Diff line change
Expand Up @@ -925,7 +925,9 @@ protected final TProcessor initialize() throws Exception {
// }

// "type" or "type:p1=v1;p2=v2;...;pn=vn"
private static final Pattern METRICS_CONFIG_PATT = Pattern.compile("([a-z]+)(:\\w+=[^;]+(?:;\\w+=[^;]+)*)?");
private static final Pattern METRICS_CONFIG_PATT = Pattern.compile("([a-z;]+)(:\\w+=[^;]+(?:;\\w+=[^;]+)*)?");
// "metric_name" or "metric:name;l1=v1,l2=v2,...,ln=vn,"
private static final Pattern CUSTOM_METRIC_CONFIG_PATT = Pattern.compile("([a-z_:]+);(\\w+=[^;]+(?:;\\w+=[^,]+)*)?");

private static Metrics setUpMetrics() throws Exception {
if (System.getenv("MM_METRICS_STATSD_PORT") != null || System.getenv("MM_METRICS_PROMETHEUS_PORT") != null) {
Expand Down Expand Up @@ -958,12 +960,38 @@ private static Metrics setUpMetrics() throws Exception {
params.put(kv[0], kv[1]);
}
}
String infoMetricConfig = getStringParameter(MMESH_CUSTOM_ENV_VAR, null);
Map<String, String> infoMetricParams;
if (infoMetricConfig == null) {
logger.info("{} returned null", MMESH_CUSTOM_ENV_VAR);
infoMetricParams = null;
} else {
logger.info("{} set to \"{}\"", MMESH_CUSTOM_ENV_VAR, infoMetricConfig);
Matcher infoMetricMatcher = CUSTOM_METRIC_CONFIG_PATT.matcher(infoMetricConfig);
if (!infoMetricMatcher.matches()) {
throw new Exception("Invalid metrics configuration provided in env var " + MMESH_CUSTOM_ENV_VAR + ": \""
+ infoMetricConfig + "\"");
}
String infoMetricName = infoMetricMatcher.group(1);
String infoMetricParamString = infoMetricMatcher.group(2);
infoMetricParams = new HashMap<>();
infoMetricParams.put("metric_name", infoMetricName);
for (String infoMetricParam : infoMetricParamString.substring(0).split(",")) {
String[] kv = infoMetricParam.split("=");
String value = System.getenv(kv[1]);
if (value == null) {
throw new Exception("Env var " + kv[1] + " is unresolved in " + MMESH_CUSTOM_ENV_VAR + ": \""
+ infoMetricConfig + "\"");
}
infoMetricParams.put(kv[0], value);
}
}
try {
switch (type.toLowerCase()) {
case "statsd":
return new Metrics.StatsDMetrics(params);
case "prometheus":
return new Metrics.PrometheusMetrics(params);
return new Metrics.PrometheusMetrics(params, infoMetricParams);
case "disabled":
logger.info("Metrics publishing is disabled (env var {}={})", MMESH_METRICS_ENV_VAR, metricsConfig);
return Metrics.NO_OP_METRICS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ private ModelMeshEnvVars() {}
public static final String LOAD_FAILURE_EXPIRY_ENV_VAR = "MM_LOAD_FAILURE_EXPIRY_TIME_MS";

public static final String MMESH_METRICS_ENV_VAR = "MM_METRICS";
public static final String MMESH_CUSTOM_ENV_VAR = "MM_INFO_METRICS";

public static final String LOG_EACH_INVOKE_ENV_VAR = "MM_LOG_EACH_INVOKE";
public static final String SEND_DEST_ID_ENV_VAR = "MM_SEND_DEST_ID";
Expand Down
16 changes: 13 additions & 3 deletions src/test/java/com/ibm/watson/modelmesh/ModelMeshMetricsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ protected int requestCount() {

static final String SCHEME = "https"; // or http

static final String METRIC_NAME = "assistant_deployment_info:relabel";
static final String DEPLOYMENT_NAME = "ga-tf-mm";
static final String SLOT_NAME = "ga";
static final String COMPONENT_NAME = "tf-mm";
static final String GROUP_NAME = "clu";

@Override
protected Map<String, String> extraEnvVars() {
return ImmutableMap.of("MM_METRICS", "prometheus:port=" + METRICS_PORT + ";scheme=" + SCHEME);
Expand All @@ -84,7 +90,7 @@ public void metricsTest() throws Exception {

// verify not found status
ModelStatusInfo status = manageModels.getModelStatus(GetStatusRequest.newBuilder()
.setModelId("i don't exist").build());
.setModelId("I don't exist").build());

assertEquals(ModelStatus.NOT_FOUND, status.getStatus());
assertEquals(0, status.getErrorsCount());
Expand Down Expand Up @@ -166,7 +172,6 @@ public void verifyMetrics() throws Exception {
.filter(Matcher::matches)
.collect(Collectors.toMap(m -> m.group(1), m -> Double.parseDouble(m.group(2))));


System.out.println(metrics.size() + " metrics scraped");

// Spot check some expected metrics and values
Expand Down Expand Up @@ -198,5 +203,10 @@ public void verifyMetrics() throws Exception {
assertEquals(0.0, metrics.get("jvm_buffer_pool_used_buffers{pool=\"mapped\",}")); // mmapped memory not used
assertTrue(metrics.containsKey("jvm_gc_collection_seconds_sum{gc=\"G1 Young Generation\",}"));
assertTrue(metrics.containsKey("jvm_memory_bytes_committed{area=\"heap\",}"));

// Info metrics
assertEquals(1.0, metrics.get(METRIC_NAME + "{component=\"" + COMPONENT_NAME
+ "\",slot=\"" + SLOT_NAME + "\",deployment=\"" + DEPLOYMENT_NAME + "\",group=\"" + GROUP_NAME + "\",}"));
}
}

}

0 comments on commit d1d0156

Please sign in to comment.