Skip to content

Commit

Permalink
test: deflake metrics unit tests (googleapis#2253)
Browse files Browse the repository at this point in the history
* test: deflake metrics unit tests

Change-Id: I65774cd89908b986600bba8feff609090aa74dc3

* fixed off by one error & re-added checks for name collisions

Change-Id: Ib971f7ac7c117fde274b66ea8b2845cfbdd62b0f

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* remove unnecessary variables

Change-Id: If60e63dfd13edc85750de36f9f547d7ffc5abce8

* better diagnostic message

Change-Id: I796e1234e7cebd3408e227b9e876a19ddb3a6b6a

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
igorbernstein2 and gcf-owl-bot[bot] authored Jun 11, 2024
1 parent da703db commit e62c969
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@
import io.opentelemetry.sdk.testing.exporter.InMemoryMetricReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -268,8 +267,6 @@ public void testBuiltinMetricsWithCustomOTEL() throws Exception {

ProjectName name = ProjectName.of(testEnvRule.env().getProjectId());

Collection<MetricData> fromMetricReader = metricReader.collectAllMetrics();

// Interval is set in the monarch request when query metric timestamps.
// Restrict it to before we send to request and 3 minute after we send the request. If
// it turns out to be still flaky we can increase the filter range.
Expand All @@ -285,7 +282,7 @@ public void testBuiltinMetricsWithCustomOTEL() throws Exception {
if (view.equals("application_blocking_latencies")) {
otelMetricName = "application_latencies";
}
MetricData dataFromReader = getMetricData(fromMetricReader, otelMetricName);
MetricData dataFromReader = getMetricData(metricReader, otelMetricName);

// Filter on instance and method name
// Verify that metrics are correct for MutateRows request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 +16,63 @@
package com.google.cloud.bigtable.data.v2.stub.metrics;

import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth.assertWithMessage;

import com.google.api.core.InternalApi;
import com.google.common.truth.Correspondence;
import com.google.protobuf.Timestamp;
import com.google.protobuf.util.Timestamps;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.sdk.metrics.data.HistogramPointData;
import io.opentelemetry.sdk.metrics.data.LongPointData;
import io.opentelemetry.sdk.metrics.data.MetricData;
import io.opentelemetry.sdk.testing.exporter.InMemoryMetricReader;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.Assert;

@InternalApi
public class BuiltinMetricsTestUtils {
private static final Correspondence<MetricData, String> METRIC_DATA_BY_NAME =
Correspondence.transforming(MetricData::getName, "MetricData name");

private BuiltinMetricsTestUtils() {}

public static MetricData getMetricData(Collection<MetricData> allMetricData, String metricName) {
List<MetricData> metricDataList =
allMetricData.stream()
.filter(md -> md.getName().equals(BuiltinMetricsConstants.METER_NAME + metricName))
.collect(Collectors.toList());
if (metricDataList.size() == 0) {
allMetricData.stream().forEach(md -> System.out.println(md.getName()));
public static MetricData getMetricData(InMemoryMetricReader reader, String metricName) {
String fullMetricName = BuiltinMetricsConstants.METER_NAME + metricName;
Collection<MetricData> allMetricData = Collections.emptyList();

// Fetch the MetricData with retries
for (int attemptsLeft = 10; attemptsLeft > 0; attemptsLeft--) {
allMetricData = reader.collectAllMetrics();
List<MetricData> matchingMetadata =
allMetricData.stream()
.filter(md -> METRIC_DATA_BY_NAME.compare(md, fullMetricName))
.collect(Collectors.toList());
assertWithMessage(
"Found multiple MetricData with the same name: %s, in: %s",
fullMetricName, matchingMetadata)
.that(matchingMetadata.size())
.isAtMost(1);

if (!matchingMetadata.isEmpty()) {
return matchingMetadata.get(0);
}

try {
Thread.sleep(100);
} catch (InterruptedException interruptedException) {
Thread.currentThread().interrupt();
throw new RuntimeException(interruptedException);
}
}
assertThat(metricDataList.size()).isEqualTo(1);

return metricDataList.get(0);
// MetricData was not found, assert on original collection to get a descriptive error message
assertThat(allMetricData).comparingElementsUsing(METRIC_DATA_BY_NAME).contains(fullMetricName);
throw new IllegalStateException(
"MetricData was missing then appeared, this should never happen");
}

public static long getAggregatedValue(MetricData metricData, Attributes attributes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@
import io.opentelemetry.sdk.testing.exporter.InMemoryMetricReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -298,9 +297,7 @@ public void testReadRowsOperationLatencies() {
.put(CLIENT_NAME_KEY, CLIENT_NAME)
.build();

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();

MetricData metricData = getMetricData(allMetricData, OPERATION_LATENCIES_NAME);
MetricData metricData = getMetricData(metricReader, OPERATION_LATENCIES_NAME);

long value = getAggregatedValue(metricData, expectedAttributes);
assertThat(value).isIn(Range.closed(SERVER_LATENCY, elapsed));
Expand All @@ -326,9 +323,7 @@ public void testReadRowsOperationLatenciesOnAuthorizedView() {
.put(CLIENT_NAME_KEY, CLIENT_NAME)
.build();

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();

MetricData metricData = getMetricData(allMetricData, OPERATION_LATENCIES_NAME);
MetricData metricData = getMetricData(metricReader, OPERATION_LATENCIES_NAME);
long value = getAggregatedValue(metricData, expectedAttributes);
assertThat(value).isIn(Range.closed(SERVER_LATENCY, elapsed));
}
Expand All @@ -348,15 +343,13 @@ public void testGfeMetrics() {
.put(METHOD_KEY, "Bigtable.ReadRows")
.build();

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();

MetricData serverLatenciesMetricData = getMetricData(allMetricData, SERVER_LATENCIES_NAME);
MetricData serverLatenciesMetricData = getMetricData(metricReader, SERVER_LATENCIES_NAME);

long serverLatencies = getAggregatedValue(serverLatenciesMetricData, expectedAttributes);
assertThat(serverLatencies).isEqualTo(FAKE_SERVER_TIMING);

MetricData connectivityErrorCountMetricData =
getMetricData(allMetricData, CONNECTIVITY_ERROR_COUNT_NAME);
getMetricData(metricReader, CONNECTIVITY_ERROR_COUNT_NAME);
Attributes expected1 =
baseAttributes
.toBuilder()
Expand Down Expand Up @@ -420,9 +413,8 @@ public void onComplete() {

assertThat(counter.get()).isEqualTo(fakeService.getResponseCounter().get());

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData applicationLatency =
getMetricData(allMetricData, APPLICATION_BLOCKING_LATENCIES_NAME);
getMetricData(metricReader, APPLICATION_BLOCKING_LATENCIES_NAME);

Attributes expectedAttributes =
baseAttributes
Expand All @@ -437,7 +429,7 @@ public void onComplete() {

assertThat(value).isAtLeast((APPLICATION_LATENCY - SLEEP_VARIABILITY) * counter.get());

MetricData operationLatency = getMetricData(allMetricData, OPERATION_LATENCIES_NAME);
MetricData operationLatency = getMetricData(metricReader, OPERATION_LATENCIES_NAME);
long operationLatencyValue =
getAggregatedValue(
operationLatency,
Expand All @@ -457,9 +449,8 @@ public void testReadRowsApplicationLatencyWithManualFlowControl() throws Excepti
rows.next();
}

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData applicationLatency =
getMetricData(allMetricData, APPLICATION_BLOCKING_LATENCIES_NAME);
getMetricData(metricReader, APPLICATION_BLOCKING_LATENCIES_NAME);

Attributes expectedAttributes =
baseAttributes
Expand All @@ -477,7 +468,7 @@ public void testReadRowsApplicationLatencyWithManualFlowControl() throws Excepti
assertThat(counter).isEqualTo(fakeService.getResponseCounter().get());
assertThat(value).isAtLeast(APPLICATION_LATENCY * (counter - 1) - SERVER_LATENCY);

MetricData operationLatency = getMetricData(allMetricData, OPERATION_LATENCIES_NAME);
MetricData operationLatency = getMetricData(metricReader, OPERATION_LATENCIES_NAME);
long operationLatencyValue =
getAggregatedValue(
operationLatency,
Expand All @@ -490,8 +481,7 @@ public void testRetryCount() throws InterruptedException {
stub.mutateRowCallable()
.call(RowMutation.create(TABLE, "random-row").setCell("cf", "q", "value"));

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData metricData = getMetricData(allMetricData, RETRY_COUNT_NAME);
MetricData metricData = getMetricData(metricReader, RETRY_COUNT_NAME);
Attributes expectedAttributes =
baseAttributes
.toBuilder()
Expand All @@ -512,8 +502,7 @@ public void testMutateRowAttemptsTagValues() {
stub.mutateRowCallable()
.call(RowMutation.create(TABLE, "random-row").setCell("cf", "q", "value"));

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData metricData = getMetricData(allMetricData, ATTEMPT_LATENCIES_NAME);
MetricData metricData = getMetricData(metricReader, ATTEMPT_LATENCIES_NAME);

Attributes expected1 =
baseAttributes
Expand Down Expand Up @@ -554,8 +543,7 @@ public void testMutateRowsPartialError() throws InterruptedException {

Assert.assertThrows(BatchingException.class, batcher::close);

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData metricData = getMetricData(allMetricData, ATTEMPT_LATENCIES_NAME);
MetricData metricData = getMetricData(metricReader, ATTEMPT_LATENCIES_NAME);

Attributes expected =
baseAttributes
Expand Down Expand Up @@ -584,8 +572,7 @@ public void testMutateRowsRpcError() {

Assert.assertThrows(BatchingException.class, batcher::close);

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData metricData = getMetricData(allMetricData, ATTEMPT_LATENCIES_NAME);
MetricData metricData = getMetricData(metricReader, ATTEMPT_LATENCIES_NAME);

Attributes expected =
baseAttributes
Expand All @@ -606,8 +593,7 @@ public void testMutateRowsRpcError() {
public void testReadRowsAttemptsTagValues() {
Lists.newArrayList(stub.readRowsCallable().call(Query.create("fake-table")).iterator());

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData metricData = getMetricData(allMetricData, ATTEMPT_LATENCIES_NAME);
MetricData metricData = getMetricData(metricReader, ATTEMPT_LATENCIES_NAME);

Attributes expected1 =
baseAttributes
Expand Down Expand Up @@ -649,8 +635,7 @@ public void testBatchBlockingLatencies() throws InterruptedException {

int expectedNumRequests = 6 / batchElementCount;

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData applicationLatency = getMetricData(allMetricData, CLIENT_BLOCKING_LATENCIES_NAME);
MetricData applicationLatency = getMetricData(metricReader, CLIENT_BLOCKING_LATENCIES_NAME);

Attributes expectedAttributes =
baseAttributes
Expand All @@ -675,8 +660,7 @@ public void testBatchBlockingLatencies() throws InterruptedException {
public void testQueuedOnChannelServerStreamLatencies() {
stub.readRowsCallable().all().call(Query.create(TABLE));

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData clientLatency = getMetricData(allMetricData, CLIENT_BLOCKING_LATENCIES_NAME);
MetricData clientLatency = getMetricData(metricReader, CLIENT_BLOCKING_LATENCIES_NAME);

Attributes attributes =
baseAttributes
Expand All @@ -697,8 +681,7 @@ public void testQueuedOnChannelUnaryLatencies() {

stub.mutateRowCallable().call(RowMutation.create(TABLE, "a-key").setCell("f", "q", "v"));

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData clientLatency = getMetricData(allMetricData, CLIENT_BLOCKING_LATENCIES_NAME);
MetricData clientLatency = getMetricData(metricReader, CLIENT_BLOCKING_LATENCIES_NAME);

Attributes attributes =
baseAttributes
Expand All @@ -723,8 +706,7 @@ public void testPermanentFailure() {
} catch (NotFoundException e) {
}

Collection<MetricData> allMetricData = metricReader.collectAllMetrics();
MetricData attemptLatency = getMetricData(allMetricData, ATTEMPT_LATENCIES_NAME);
MetricData attemptLatency = getMetricData(metricReader, ATTEMPT_LATENCIES_NAME);

Attributes expected =
baseAttributes
Expand All @@ -740,7 +722,7 @@ public void testPermanentFailure() {

verifyAttributes(attemptLatency, expected);

MetricData opLatency = getMetricData(allMetricData, OPERATION_LATENCIES_NAME);
MetricData opLatency = getMetricData(metricReader, OPERATION_LATENCIES_NAME);
verifyAttributes(opLatency, expected);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import io.opentelemetry.sdk.metrics.data.MetricData;
import io.opentelemetry.sdk.testing.exporter.InMemoryMetricReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ScheduledExecutorService;
Expand Down Expand Up @@ -138,10 +137,9 @@ public void readWithOneChannel() throws Exception {

runInterceptorTasksAndAssertCount();

Collection<MetricData> allMetrics = metricReader.collectAllMetrics();
MetricData metricData =
BuiltinMetricsTestUtils.getMetricData(
allMetrics, BuiltinMetricsConstants.PER_CONNECTION_ERROR_COUNT_NAME);
metricReader, BuiltinMetricsConstants.PER_CONNECTION_ERROR_COUNT_NAME);

// Make sure the correct bucket is updated with the correct number of data points
ArrayList<HistogramPointData> histogramPointData =
Expand Down Expand Up @@ -179,10 +177,9 @@ public void readWithTwoChannels() throws Exception {

long errorCountPerChannel = totalErrorCount / 2;

Collection<MetricData> allMetrics = metricReader.collectAllMetrics();
MetricData metricData =
BuiltinMetricsTestUtils.getMetricData(
allMetrics, BuiltinMetricsConstants.PER_CONNECTION_ERROR_COUNT_NAME);
metricReader, BuiltinMetricsConstants.PER_CONNECTION_ERROR_COUNT_NAME);

// The 2 channels should get equal amount of errors, so the totalErrorCount / 2 bucket is
// updated twice.
Expand Down Expand Up @@ -234,10 +231,9 @@ public void readOverTwoPeriods() throws Exception {

runInterceptorTasksAndAssertCount();

Collection<MetricData> allMetrics = metricReader.collectAllMetrics();
MetricData metricData =
BuiltinMetricsTestUtils.getMetricData(
allMetrics, BuiltinMetricsConstants.PER_CONNECTION_ERROR_COUNT_NAME);
metricReader, BuiltinMetricsConstants.PER_CONNECTION_ERROR_COUNT_NAME);

ArrayList<HistogramPointData> histogramPointData =
new ArrayList<>(metricData.getHistogramData().getPoints());
Expand All @@ -261,10 +257,9 @@ public void noFailedRequests() throws Exception {
}
}
runInterceptorTasksAndAssertCount();
Collection<MetricData> allMetrics = metricReader.collectAllMetrics();
MetricData metricData =
BuiltinMetricsTestUtils.getMetricData(
allMetrics, BuiltinMetricsConstants.PER_CONNECTION_ERROR_COUNT_NAME);
metricReader, BuiltinMetricsConstants.PER_CONNECTION_ERROR_COUNT_NAME);
long value = BuiltinMetricsTestUtils.getAggregatedValue(metricData, attributes);
assertThat(value).isEqualTo(0);
}
Expand Down

0 comments on commit e62c969

Please sign in to comment.