Skip to content

Commit

Permalink
Merge pull request #311 from cliveseldon/custom_metric_tags
Browse files Browse the repository at this point in the history
WIP: Custom metric tags
  • Loading branch information
ukclivecox authored Nov 25, 2018
2 parents 1756b9b + 707193f commit eae1624
Show file tree
Hide file tree
Showing 40 changed files with 451 additions and 243 deletions.
9 changes: 5 additions & 4 deletions api-frontend/src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ io.seldon.apife.engine-container-port=8000
io.seldon.apife.engine-grpc-container-port=5001
io.seldon.apife.namespace=${SELDON_CLUSTER_MANAGER_POD_NAMESPACE}

spring.metrics.web.server.requestsMetricName=seldon.api.ingress.server.requests
spring.metrics.web.client.requestsMetricName=seldon.api.ingress.client.requests
spring.metrics.web.server.recordRequestPercentiles=true
spring.metrics.web.client.recordRequestPercentiles=true

management.metrics.web.server.requests-metric-name=seldon.api.ingress.server.requests
management.metrics.web.client.requests-metric-name=seldon.api.ingress.client.requests
management.metrics.distribution.percentiles.all=0.5, 0.75, 0.95, 0.98, 0.99
management.metrics.distribution.percentiles-histogram.all=true
endpoints.prometheus.sensitive=false

7 changes: 6 additions & 1 deletion docs/custom_metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Seldon Core exposes basic metrics via Prometheus endpoints on its service orches
"type": "COUNTER",
"key": "mycounter",
"value": 1.0
"tags": {"mytag":"mytagvalue"}
},
{
"type": "GAUGE",
Expand Down Expand Up @@ -52,14 +53,18 @@ message Metric {
string key = 1;
MetricType type = 2;
float value = 3;
map<string,string> tags = 4;
}
```


As we expose the metrics via Prometheus, if ```tags``` are added they must appear in every metric response and always have the same set of keys as Prometheus does not allow metrics to have varying numbers of tags. This condition is enforced by the [micrometer](https://micrometer.io/) library we use to expose the metrics and exceptions will happen if violated.

At present the following Seldon Core wrappers provide integrations with custom metrics:

* [Python Wrapper](./wrappers/python.md#custom-metrics)


# example
# Example

There is an [example notebook illustrating a model with custom metrics in python](../examples/models/template_model_with_metrics/modelWithMetrics.ipynb).
2 changes: 1 addition & 1 deletion engine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<grpc.version>1.14.0</grpc.version>
<micrometer.version>1.0.0-rc.1</micrometer.version>
<micrometer.version>1.1.0</micrometer.version>
<start-class>io.seldon.engine.App</start-class>
</properties>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@

import com.google.common.util.concurrent.AtomicDouble;

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Meter;
import io.micrometer.core.instrument.Meter.Type;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.Tags;
import io.micrometer.core.instrument.Timer;
import io.seldon.protos.PredictionProtos.Metric;

/**
Expand All @@ -24,20 +28,56 @@ public class CustomMetricsManager {

private final static Logger logger = LoggerFactory.getLogger(CustomMetricsManager.class);
private ConcurrentHashMap<Meter.Id,AtomicDouble> gauges = new ConcurrentHashMap<>();


public AtomicDouble get(Iterable<Tag> tags,Metric metric)
public AtomicDouble getGaugeValue(Iterable<Tag> tags,Metric metric)
{
Meter.Id id = Metrics.globalRegistry.createId(metric.getKey(), tags, "");
Tags tagsObj = Tags.concat(tags);
Meter.Id id = new Meter.Id(metric.getKey(), tagsObj, "", "", Type.GAUGE);
if (gauges.containsKey(id))
return gauges.get(id);
else
{
logger.info("Creating new metric Id for {}",metric.toString());
AtomicDouble d = new AtomicDouble();
gauges.put(id, d);
Metrics.globalRegistry.gauge(metric.getKey(), tags, d);
return d;
try
{
AtomicDouble d = new AtomicDouble();
gauges.put(id, d);
Metrics.gauge(metric.getKey(), tags, d);
return d;
}
catch (IllegalArgumentException e)
{
logger.warn("Can't create gauge Metric. Probably same name exists with different number of tags. Not allowed in Prometheus Registry. Key {}",metric.getKey(),e);
throw e;
}
}
}

public Counter getCounter(Iterable<Tag> tags,Metric metric)
{
try
{
Counter counter = Metrics.counter(metric.getKey(), tags);
return counter;
}
catch (IllegalArgumentException e)
{
logger.warn("Can't create counter Metric. Probably same name exists with different number of tags. Not allowed in Prometheus Registry. Key {}",metric.getKey(),e);
throw e;
}
}

public Timer getTimer(Iterable<Tag> tags,Metric metric)
{
try
{
Timer timer = Metrics.timer(metric.getKey(), tags);
return timer;
}
catch (IllegalArgumentException e)
{
logger.warn("Can't create Timer Metric. Probably same name exists with different number of tags. Not allowed in Prometheus Registry. Key: {}",metric.getKey(),e);
throw e;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
*******************************************************************************/
package io.seldon.engine.metrics;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpRequest;
Expand Down Expand Up @@ -61,6 +63,21 @@ public Iterable<Tag> getTags(String urlTemplate, HttpRequest request, ClientHttp
predictorVersion());
}

public Iterable<Tag> getModelMetrics(PredictiveUnitState state, Map<String,String> customTags)
{
ArrayList<Tag> customTagsList = new ArrayList<>();
if (customTags != null)
for(Map.Entry<String, String> e : customTags.entrySet())
{
customTagsList.add(Tag.of(e.getKey(), e.getValue()));
}
for(Tag t : getModelMetrics(state))
{
customTagsList.add(t);
}
return customTagsList;
}

public Iterable<Tag> getModelMetrics(PredictiveUnitState state)
{
return Arrays.asList(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,17 @@

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.Timer;
import io.seldon.engine.exception.APIException;
import io.seldon.engine.metrics.CustomMetricsManager;
import io.seldon.engine.metrics.SeldonRestTemplateExchangeTagsProvider;
import io.seldon.engine.service.InternalPredictionService;
import io.seldon.protos.DeploymentProtos.PredictiveUnit.PredictiveUnitMethod;
import io.seldon.protos.PredictionProtos.DefaultData;
import io.seldon.protos.PredictionProtos.Feedback;
import io.seldon.protos.PredictionProtos.Meta;
import io.seldon.protos.PredictionProtos.Metric;
import io.seldon.protos.PredictionProtos.SeldonMessage;
import io.seldon.protos.PredictionProtos.Tensor;

@Component
public class PredictiveUnitBean extends PredictiveUnitImpl {
Expand Down Expand Up @@ -290,19 +289,22 @@ private void addCustomMetrics(List<Metric> metrics, PredictiveUnitState state)
logger.info("Add metrics");
for(Metric metric : metrics)
{
Iterable<Tag> tags = tagsProvider.getModelMetrics(state, metric.getTagsMap());
switch(metric.getType())
{
case COUNTER:
logger.info("Adding counter {} for {}",metric.getKey(),state.name);
Counter.builder(metric.getKey()).tags(tagsProvider.getModelMetrics(state)).register(Metrics.globalRegistry).increment(metric.getValue());
Counter counter = customMetricsManager.getCounter(tags, metric);
counter.increment(metric.getValue());
break;
case GAUGE:
logger.info("Adding gauge {} for {}",metric.getKey(),state.name);
customMetricsManager.get(tagsProvider.getModelMetrics(state), metric).set(metric.getValue());
logger.info("Adding gauge {} for {}",metric.getKey(),state.name);
customMetricsManager.getGaugeValue(tags, metric).set(metric.getValue());
break;
case TIMER:
logger.info("Adding timer {} for {}",metric.getKey(),state.name);
Timer.builder(metric.getKey()).tags(tagsProvider.getModelMetrics(state)).register(Metrics.globalRegistry).record((long) metric.getValue(), TimeUnit.MILLISECONDS);
logger.info("Adding timer {} for {}",metric.getKey(),state.name);
Timer timer = customMetricsManager.getTimer(tags, metric);
timer.record((long) metric.getValue(), TimeUnit.MILLISECONDS);
break;
case UNRECOGNIZED:
break;
Expand Down
8 changes: 4 additions & 4 deletions engine/src/main/resources/application.properties
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
server.port = 8081
server.additionalPorts=8082

spring.metrics.web.server.requestsMetricName=seldon.api.engine.server.requests
spring.metrics.web.client.requestsMetricName=seldon.api.engine.client.requests
spring.metrics.web.server.recordRequestPercentiles=true
spring.metrics.web.client.recordRequestPercentiles=true
management.metrics.web.server.requests-metric-name=seldon.api.engine.server.requests
management.metrics.web.client.requests-metric-name=seldon.api.engine.client.requests
management.metrics.distribution.percentiles.all=0.5, 0.75, 0.95, 0.98, 0.99
management.metrics.distribution.percentiles-histogram.all=true
endpoints.prometheus.sensitive=false

spring.jmx.enabled = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
@AutoConfigureMockMvc
//@AutoConfigureMockMvc
public class TestRestClientController {

@Autowired
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import io.kubernetes.client.proto.Meta.Time;
import io.kubernetes.client.proto.Meta.Timestamp;
import io.kubernetes.client.proto.Resource.Quantity;
import io.micrometer.core.instrument.Metrics;
import io.seldon.engine.pb.IntOrStringUtils;
import io.seldon.engine.pb.JsonFormat;
import io.seldon.engine.pb.QuantityUtils;
Expand Down Expand Up @@ -90,6 +91,7 @@ private <T extends Message.Builder> void updateMessageBuilderFromJson(T messageB

@Before
public void setup() throws Exception {

mvc = MockMvcBuilders
.webAppContextSetup(context)
.build();
Expand All @@ -104,6 +106,7 @@ public void setup() throws Exception {
@Autowired
private InternalPredictionService internalPredictionService;




@Test
Expand Down Expand Up @@ -166,8 +169,9 @@ public ResponseEntity<String> answer(InvocationOnMock invocation) {
MvcResult res2 = mvc.perform(MockMvcRequestBuilders.get("/prometheus")).andReturn();
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
System.out.println("response is ["+response+"]");
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mygauge{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 22.0")>-1);
System.out.println(response);

Expand Down Expand Up @@ -200,8 +204,8 @@ public ResponseEntity<String> answer(InvocationOnMock invocation) {
res2 = mvc.perform(MockMvcRequestBuilders.get("/prometheus")).andReturn();
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 2.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 2.0")>-1);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 2.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 2.0")>-1);
Assert.assertTrue(response.indexOf("mygauge{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 100.0")>-1);
System.out.println(response);
}
Expand Down Expand Up @@ -256,8 +260,8 @@ public void testInputTransformInputMetrics() throws Exception
MvcResult res2 = mvc.perform(MockMvcRequestBuilders.get("/prometheus")).andReturn();
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transformer\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transformer\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transformer\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transformer\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
System.out.println(response);
}

Expand Down Expand Up @@ -312,8 +316,8 @@ public void testTransformOutputMetrics() throws Exception
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
System.out.println(response);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transform_output\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transform_output\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transform_output\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transform_output\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);

}

Expand Down Expand Up @@ -368,8 +372,8 @@ public void testRouterMetrics() throws Exception
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
System.out.println(response);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/router\",model_name=\"router\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/router\",model_name=\"router\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/router\",model_name=\"router\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/router\",model_name=\"router\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);

}

Expand Down Expand Up @@ -423,9 +427,11 @@ public void testCombinerMetrics() throws Exception
MvcResult res2 = mvc.perform(MockMvcRequestBuilders.get("/prometheus")).andReturn();
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
System.out.println(response);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/combiner\",model_name=\"combiner\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/combiner\",model_name=\"combiner\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
System.out.println("----------------------------------------");
System.out.println("----------------------------------------");
System.out.println(response);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/combiner\",model_name=\"combiner\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/combiner\",model_name=\"combiner\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);

}

Expand Down
38 changes: 0 additions & 38 deletions engine/src/test/java/io/seldon/engine/grpc/MetricsTest.java

This file was deleted.

Loading

0 comments on commit eae1624

Please sign in to comment.