Skip to content

Commit

Permalink
Using System::nanoTime for latency calculations
Browse files Browse the repository at this point in the history
Signed-off-by: Harsh Garg <gkharsh@amazon.com>
  • Loading branch information
Harsh Garg committed Mar 18, 2024
1 parent 8d20632 commit 7d91a49
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import org.opensearch.cluster.routing.allocation.command.AllocationCommands;
import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders;
import org.opensearch.cluster.routing.allocation.decider.Decision;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.gateway.GatewayAllocator;
import org.opensearch.gateway.PriorityComparator;
import org.opensearch.snapshots.SnapshotsInfoService;
Expand Down Expand Up @@ -547,12 +548,12 @@ private void reroute(RoutingAllocation allocation) {
assert AutoExpandReplicas.getAutoExpandReplicaChanges(allocation.metadata(), allocation).isEmpty()
: "auto-expand replicas out of sync with number of nodes in the cluster";
assert assertInitialized();
long rerouteStartTimeMS = System.currentTimeMillis();
long rerouteStartTimeNS = System.nanoTime();
removeDelayMarkers(allocation);

allocateExistingUnassignedShards(allocation); // try to allocate existing shard copies first
shardsAllocator.allocate(allocation);
this.rerouteHistogram.record((double) Math.max(0, System.currentTimeMillis() - rerouteStartTimeMS));
this.rerouteHistogram.record((double) Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - rerouteStartTimeNS)));
assert RoutingNodes.assertShardStats(allocation.routingNodes());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,6 @@
public class ClusterApplierService extends AbstractLifecycleComponent implements ClusterApplier {
private static final Logger logger = LogManager.getLogger(ClusterApplierService.class);

private static final String LATENCY_METRIC_UNIT = "ms";

private static final String LATENCY_METRIC_OPERATION_TAG_KEY = "Operation";

public static final Setting<TimeValue> CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING = Setting.positiveTimeSetting(
"cluster.service.slow_task_logging_threshold",
TimeValue.timeValueSeconds(30),
Expand Down Expand Up @@ -128,9 +124,9 @@ public class ClusterApplierService extends AbstractLifecycleComponent implements

private NodeConnectionsService nodeConnectionsService;

private final Histogram clusterStateAppliersHistogram;
private Histogram clusterStateAppliersHistogram;

private final Histogram clusterStateListenersHistogram;
private Histogram clusterStateListenersHistogram;

public ClusterApplierService(
String nodeName,
Expand All @@ -149,16 +145,19 @@ public ClusterApplierService(
CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING,
this::setSlowTaskLoggingThreshold
);
initializeMetrics(metricsRegistry);
}

private void initializeMetrics(MetricsRegistry metricsRegistry) {
this.clusterStateAppliersHistogram = metricsRegistry.createHistogram(
"cluster.state.appliers.latency",
"Histogram for tracking the latency of cluster state appliers",
LATENCY_METRIC_UNIT
"ms"
);
this.clusterStateListenersHistogram = metricsRegistry.createHistogram(
"cluster.state.listeners.latency",
"Histogram for tracking the latency of cluster state listeners",
LATENCY_METRIC_UNIT
"ms"
);
}

Expand Down Expand Up @@ -633,12 +632,11 @@ private void callClusterStateAppliers(
for (ClusterStateApplier applier : clusterStateAppliers) {
logger.trace("calling [{}] with change to version [{}]", applier, clusterChangedEvent.state().version());
try (TimingHandle ignored = stopWatch.timing("running applier [" + applier + "]")) {
long applierStartTimeMS = System.currentTimeMillis();
long applierStartTimeNS = System.nanoTime();
applier.applyClusterState(clusterChangedEvent);
double applierExecutionTimeMS = (double) Math.max(0, System.currentTimeMillis() - applierStartTimeMS);
clusterStateAppliersHistogram.record(
applierExecutionTimeMS,
Tags.create().addTag(LATENCY_METRIC_OPERATION_TAG_KEY, applier.getClass().getSimpleName())
(double) Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - applierStartTimeNS)),
Tags.create().addTag("Operation", applier.getClass().getSimpleName())
);
}
}
Expand All @@ -658,12 +656,11 @@ private void callClusterStateListener(
try {
logger.trace("calling [{}] with change to version [{}]", listener, clusterChangedEvent.state().version());
try (TimingHandle ignored = stopWatch.timing("notifying listener [" + listener + "]")) {
long listenerStartTimeMS = System.currentTimeMillis();
long listenerStartTimeNS = System.nanoTime();
listener.clusterChanged(clusterChangedEvent);
double listenerExecutionTimeMS = (double) Math.max(0, System.currentTimeMillis() - listenerStartTimeMS);
clusterStateListenersHistogram.record(
listenerExecutionTimeMS,
Tags.create().addTag(LATENCY_METRIC_OPERATION_TAG_KEY, listener.getClass().getSimpleName())
(double) Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - listenerStartTimeNS)),
Tags.create().addTag("Operation", listener.getClass().getSimpleName())
);
}
} catch (Exception ex) {
Expand Down

0 comments on commit 7d91a49

Please sign in to comment.