Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Custom metric tags #311

Merged
merged 8 commits into from
Nov 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions api-frontend/src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ io.seldon.apife.engine-container-port=8000
io.seldon.apife.engine-grpc-container-port=5001
io.seldon.apife.namespace=${SELDON_CLUSTER_MANAGER_POD_NAMESPACE}

spring.metrics.web.server.requestsMetricName=seldon.api.ingress.server.requests
spring.metrics.web.client.requestsMetricName=seldon.api.ingress.client.requests
spring.metrics.web.server.recordRequestPercentiles=true
spring.metrics.web.client.recordRequestPercentiles=true

management.metrics.web.server.requests-metric-name=seldon.api.ingress.server.requests
management.metrics.web.client.requests-metric-name=seldon.api.ingress.client.requests
management.metrics.distribution.percentiles.all=0.5, 0.75, 0.95, 0.98, 0.99
management.metrics.distribution.percentiles-histogram.all=true
endpoints.prometheus.sensitive=false

7 changes: 6 additions & 1 deletion docs/custom_metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Seldon Core exposes basic metrics via Prometheus endpoints on its service orches
"type": "COUNTER",
"key": "mycounter",
"value": 1.0
"tags": {"mytag":"mytagvalue"}
},
{
"type": "GAUGE",
Expand Down Expand Up @@ -52,14 +53,18 @@ message Metric {
string key = 1;
MetricType type = 2;
float value = 3;
map<string,string> tags = 4;
}
```


As we expose the metrics via Prometheus, if ```tags``` are added they must appear in every metric response and always have the same set of keys as Prometheus does not allow metrics to have varying numbers of tags. This condition is enforced by the [micrometer](https://micrometer.io/) library we use to expose the metrics and exceptions will happen if violated.

At present the following Seldon Core wrappers provide integrations with custom metrics:

* [Python Wrapper](./wrappers/python.md#custom-metrics)


# example
# Example

There is an [example notebook illustrating a model with custom metrics in python](../examples/models/template_model_with_metrics/modelWithMetrics.ipynb).
2 changes: 1 addition & 1 deletion engine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<grpc.version>1.14.0</grpc.version>
<micrometer.version>1.0.0-rc.1</micrometer.version>
<micrometer.version>1.1.0</micrometer.version>
<start-class>io.seldon.engine.App</start-class>
</properties>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@

import com.google.common.util.concurrent.AtomicDouble;

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Meter;
import io.micrometer.core.instrument.Meter.Type;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.Tags;
import io.micrometer.core.instrument.Timer;
import io.seldon.protos.PredictionProtos.Metric;

/**
Expand All @@ -24,20 +28,56 @@ public class CustomMetricsManager {

private final static Logger logger = LoggerFactory.getLogger(CustomMetricsManager.class);
private ConcurrentHashMap<Meter.Id,AtomicDouble> gauges = new ConcurrentHashMap<>();


public AtomicDouble get(Iterable<Tag> tags,Metric metric)
public AtomicDouble getGaugeValue(Iterable<Tag> tags,Metric metric)
{
Meter.Id id = Metrics.globalRegistry.createId(metric.getKey(), tags, "");
Tags tagsObj = Tags.concat(tags);
Meter.Id id = new Meter.Id(metric.getKey(), tagsObj, "", "", Type.GAUGE);
if (gauges.containsKey(id))
return gauges.get(id);
else
{
logger.info("Creating new metric Id for {}",metric.toString());
AtomicDouble d = new AtomicDouble();
gauges.put(id, d);
Metrics.globalRegistry.gauge(metric.getKey(), tags, d);
return d;
try
{
AtomicDouble d = new AtomicDouble();
gauges.put(id, d);
Metrics.gauge(metric.getKey(), tags, d);
return d;
}
catch (IllegalArgumentException e)
{
logger.warn("Can't create gauge Metric. Probably same name exists with different number of tags. Not allowed in Prometheus Registry. Key {}",metric.getKey(),e);
throw e;
}
}
}

public Counter getCounter(Iterable<Tag> tags,Metric metric)
{
try
{
Counter counter = Metrics.counter(metric.getKey(), tags);
return counter;
}
catch (IllegalArgumentException e)
{
logger.warn("Can't create counter Metric. Probably same name exists with different number of tags. Not allowed in Prometheus Registry. Key {}",metric.getKey(),e);
throw e;
}
}

public Timer getTimer(Iterable<Tag> tags,Metric metric)
{
try
{
Timer timer = Metrics.timer(metric.getKey(), tags);
return timer;
}
catch (IllegalArgumentException e)
{
logger.warn("Can't create Timer Metric. Probably same name exists with different number of tags. Not allowed in Prometheus Registry. Key: {}",metric.getKey(),e);
throw e;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
*******************************************************************************/
package io.seldon.engine.metrics;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpRequest;
Expand Down Expand Up @@ -61,6 +63,21 @@ public Iterable<Tag> getTags(String urlTemplate, HttpRequest request, ClientHttp
predictorVersion());
}

public Iterable<Tag> getModelMetrics(PredictiveUnitState state, Map<String,String> customTags)
{
ArrayList<Tag> customTagsList = new ArrayList<>();
if (customTags != null)
for(Map.Entry<String, String> e : customTags.entrySet())
{
customTagsList.add(Tag.of(e.getKey(), e.getValue()));
}
for(Tag t : getModelMetrics(state))
{
customTagsList.add(t);
}
return customTagsList;
}

public Iterable<Tag> getModelMetrics(PredictiveUnitState state)
{
return Arrays.asList(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,17 @@

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.Timer;
import io.seldon.engine.exception.APIException;
import io.seldon.engine.metrics.CustomMetricsManager;
import io.seldon.engine.metrics.SeldonRestTemplateExchangeTagsProvider;
import io.seldon.engine.service.InternalPredictionService;
import io.seldon.protos.DeploymentProtos.PredictiveUnit.PredictiveUnitMethod;
import io.seldon.protos.PredictionProtos.DefaultData;
import io.seldon.protos.PredictionProtos.Feedback;
import io.seldon.protos.PredictionProtos.Meta;
import io.seldon.protos.PredictionProtos.Metric;
import io.seldon.protos.PredictionProtos.SeldonMessage;
import io.seldon.protos.PredictionProtos.Tensor;

@Component
public class PredictiveUnitBean extends PredictiveUnitImpl {
Expand Down Expand Up @@ -290,19 +289,22 @@ private void addCustomMetrics(List<Metric> metrics, PredictiveUnitState state)
logger.info("Add metrics");
for(Metric metric : metrics)
{
Iterable<Tag> tags = tagsProvider.getModelMetrics(state, metric.getTagsMap());
switch(metric.getType())
{
case COUNTER:
logger.info("Adding counter {} for {}",metric.getKey(),state.name);
Counter.builder(metric.getKey()).tags(tagsProvider.getModelMetrics(state)).register(Metrics.globalRegistry).increment(metric.getValue());
Counter counter = customMetricsManager.getCounter(tags, metric);
counter.increment(metric.getValue());
break;
case GAUGE:
logger.info("Adding gauge {} for {}",metric.getKey(),state.name);
customMetricsManager.get(tagsProvider.getModelMetrics(state), metric).set(metric.getValue());
logger.info("Adding gauge {} for {}",metric.getKey(),state.name);
customMetricsManager.getGaugeValue(tags, metric).set(metric.getValue());
break;
case TIMER:
logger.info("Adding timer {} for {}",metric.getKey(),state.name);
Timer.builder(metric.getKey()).tags(tagsProvider.getModelMetrics(state)).register(Metrics.globalRegistry).record((long) metric.getValue(), TimeUnit.MILLISECONDS);
logger.info("Adding timer {} for {}",metric.getKey(),state.name);
Timer timer = customMetricsManager.getTimer(tags, metric);
timer.record((long) metric.getValue(), TimeUnit.MILLISECONDS);
break;
case UNRECOGNIZED:
break;
Expand Down
8 changes: 4 additions & 4 deletions engine/src/main/resources/application.properties
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
server.port = 8081
server.additionalPorts=8082

spring.metrics.web.server.requestsMetricName=seldon.api.engine.server.requests
spring.metrics.web.client.requestsMetricName=seldon.api.engine.client.requests
spring.metrics.web.server.recordRequestPercentiles=true
spring.metrics.web.client.recordRequestPercentiles=true
management.metrics.web.server.requests-metric-name=seldon.api.engine.server.requests
management.metrics.web.client.requests-metric-name=seldon.api.engine.client.requests
management.metrics.distribution.percentiles.all=0.5, 0.75, 0.95, 0.98, 0.99
management.metrics.distribution.percentiles-histogram.all=true
endpoints.prometheus.sensitive=false

spring.jmx.enabled = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
@AutoConfigureMockMvc
//@AutoConfigureMockMvc
public class TestRestClientController {

@Autowired
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import io.kubernetes.client.proto.Meta.Time;
import io.kubernetes.client.proto.Meta.Timestamp;
import io.kubernetes.client.proto.Resource.Quantity;
import io.micrometer.core.instrument.Metrics;
import io.seldon.engine.pb.IntOrStringUtils;
import io.seldon.engine.pb.JsonFormat;
import io.seldon.engine.pb.QuantityUtils;
Expand Down Expand Up @@ -90,6 +91,7 @@ private <T extends Message.Builder> void updateMessageBuilderFromJson(T messageB

@Before
public void setup() throws Exception {

mvc = MockMvcBuilders
.webAppContextSetup(context)
.build();
Expand All @@ -104,6 +106,7 @@ public void setup() throws Exception {
@Autowired
private InternalPredictionService internalPredictionService;




@Test
Expand Down Expand Up @@ -166,8 +169,9 @@ public ResponseEntity<String> answer(InvocationOnMock invocation) {
MvcResult res2 = mvc.perform(MockMvcRequestBuilders.get("/prometheus")).andReturn();
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
System.out.println("response is ["+response+"]");
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mygauge{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 22.0")>-1);
System.out.println(response);

Expand Down Expand Up @@ -200,8 +204,8 @@ public ResponseEntity<String> answer(InvocationOnMock invocation) {
res2 = mvc.perform(MockMvcRequestBuilders.get("/prometheus")).andReturn();
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 2.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 2.0")>-1);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 2.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 2.0")>-1);
Assert.assertTrue(response.indexOf("mygauge{deployment_name=\"None\",model_image=\"seldonio/mean_classifier\",model_name=\"mean-classifier\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 100.0")>-1);
System.out.println(response);
}
Expand Down Expand Up @@ -256,8 +260,8 @@ public void testInputTransformInputMetrics() throws Exception
MvcResult res2 = mvc.perform(MockMvcRequestBuilders.get("/prometheus")).andReturn();
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transformer\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transformer\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transformer\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transformer\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
System.out.println(response);
}

Expand Down Expand Up @@ -312,8 +316,8 @@ public void testTransformOutputMetrics() throws Exception
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
System.out.println(response);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transform_output\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transform_output\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transform_output\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/transformer\",model_name=\"transform_output\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);

}

Expand Down Expand Up @@ -368,8 +372,8 @@ public void testRouterMetrics() throws Exception
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
System.out.println(response);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/router\",model_name=\"router\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/router\",model_name=\"router\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/router\",model_name=\"router\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/router\",model_name=\"router\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);

}

Expand Down Expand Up @@ -423,9 +427,11 @@ public void testCombinerMetrics() throws Exception
MvcResult res2 = mvc.perform(MockMvcRequestBuilders.get("/prometheus")).andReturn();
Assert.assertEquals(200, res2.getResponse().getStatus());
response = res2.getResponse().getContentAsString();
System.out.println(response);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/combiner\",model_name=\"combiner\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_duration_seconds_count{deployment_name=\"None\",model_image=\"seldonio/combiner\",model_name=\"combiner\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
System.out.println("----------------------------------------");
System.out.println("----------------------------------------");
System.out.println(response);
Assert.assertTrue(response.indexOf("mycounter_total{deployment_name=\"None\",model_image=\"seldonio/combiner\",model_name=\"combiner\",model_version=\"0.6\",mytag1=\"mytagval1\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);
Assert.assertTrue(response.indexOf("mytimer_seconds_count{deployment_name=\"None\",model_image=\"seldonio/combiner\",model_name=\"combiner\",model_version=\"0.6\",predictor_name=\"fx-market-predictor\",predictor_version=\"unknown\",} 1.0")>-1);

}

Expand Down
38 changes: 0 additions & 38 deletions engine/src/test/java/io/seldon/engine/grpc/MetricsTest.java

This file was deleted.

Loading