Skip to content

Commit

Permalink
Allow value of RetainedHeapLimiter's threshold to change on each invo…
Browse files Browse the repository at this point in the history
…cation.

PiperOrigin-RevId: 245266254
  • Loading branch information
janakdr authored and copybara-github committed Apr 25, 2019
1 parent e5d4219 commit 5ef93ef
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.common.options.OpaqueOptionsData;
import com.google.devtools.common.options.OptionsParser;
import com.google.devtools.common.options.OptionsParsingException;
import com.google.devtools.common.options.OptionsParsingResult;
import java.io.BufferedOutputStream;
import java.io.IOException;
Expand Down Expand Up @@ -382,18 +381,7 @@ private BlazeCommandResult execExclusively(
.getOptions(BlazeServerStartupOptions.class)
.oomMoreEagerlyThreshold;
}
if (oomMoreEagerlyThreshold < 0 || oomMoreEagerlyThreshold > 100) {
reporter.handle(Event.error("--oom_more_eagerly_threshold must be non-negative percent"));
return BlazeCommandResult.exitCode(ExitCode.COMMAND_LINE_ERROR);
}
if (oomMoreEagerlyThreshold != 100) {
try {
RetainedHeapLimiter.maybeInstallRetainedHeapLimiter(oomMoreEagerlyThreshold);
} catch (OptionsParsingException e) {
reporter.handle(Event.error(e.getMessage()));
return BlazeCommandResult.exitCode(ExitCode.COMMAND_LINE_ERROR);
}
}
runtime.getRetainedHeapLimiter().updateThreshold(oomMoreEagerlyThreshold);

// We register an ANSI-allowing handler associated with {@code handler} so that ANSI control
// codes can be re-introduced later even if blaze is invoked with --color=no. This is useful
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ public final class BlazeRuntime implements BugReport.BlazeRuntimeInterface {
private final BuildEventArtifactUploaderFactoryMap buildEventArtifactUploaderFactoryMap;
private final ActionKeyContext actionKeyContext;
private final ImmutableMap<String, AuthHeadersProvider> authHeadersProviderMap;
private final RetainedHeapLimiter retainedHeapLimiter = new RetainedHeapLimiter();

// Workspace state (currently exactly one workspace per server)
private BlazeWorkspace workspace;
Expand Down Expand Up @@ -490,6 +491,10 @@ public QueryRuntimeHelper.Factory getQueryRuntimeHelperFactory() {
return queryRuntimeHelperFactory;
}

RetainedHeapLimiter getRetainedHeapLimiter() {
return retainedHeapLimiter;
}

/**
* Hook method called by the BlazeCommandDispatcher prior to the dispatch of
* each command.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,104 +14,131 @@

package com.google.devtools.build.lib.runtime;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.flogger.GoogleLogger;
import com.google.devtools.build.lib.bugreport.BugReport;
import com.google.devtools.common.options.OptionsParsingException;
import com.google.devtools.build.lib.concurrent.ThreadSafety;
import com.google.devtools.build.lib.util.AbruptExitException;
import com.google.devtools.build.lib.util.ExitCode;
import com.sun.management.GarbageCollectionNotificationInfo;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryUsage;
import java.util.List;
import java.util.Map;
import java.util.OptionalInt;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Logger;
import java.util.concurrent.atomic.AtomicLong;
import javax.management.ListenerNotFoundException;
import javax.management.Notification;
import javax.management.NotificationEmitter;
import javax.management.NotificationListener;
import javax.management.openmbean.CompositeData;

/**
* Monitor the size of the retained heap and exit promptly if it grows too large. Specifically,
* check the size of the tenured space after each major GC; if it exceeds 90%, call
* {@code System.gc()} to trigger a stop-the-world collection; if it's still more than 90% full,
* exit with an {@link OutOfMemoryError}.
* Monitor the size of the retained heap and exit promptly if it grows too large. Specifically,
* check the size of the tenured space after each major GC; if it exceeds {@link
* #occupiedHeapPercentageThreshold}%, call {@code System.gc()} to trigger a stop-the-world
* collection; if it's still more than {@link #occupiedHeapPercentageThreshold}% full, exit with an
* {@link OutOfMemoryError}.
*/
class RetainedHeapLimiter implements NotificationListener {
private static final Logger logger = Logger.getLogger(RetainedHeapLimiter.class.getName());
private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();
private static final long MIN_TIME_BETWEEN_TRIGGERED_GC_MILLISECONDS = 60000;

private static int registeredOccupiedHeapPercentageThreshold = -1;
private final AtomicBoolean throwingOom = new AtomicBoolean(false);
private final ImmutableList<NotificationEmitter> tenuredGcEmitters;
private OptionalInt occupiedHeapPercentageThreshold = OptionalInt.empty();
private AtomicLong lastTriggeredGcInMilliseconds = new AtomicLong();

static void maybeInstallRetainedHeapLimiter(int occupiedHeapPercentageThreshold)
throws OptionsParsingException {
if (registeredOccupiedHeapPercentageThreshold == -1) {
registeredOccupiedHeapPercentageThreshold = occupiedHeapPercentageThreshold;
new RetainedHeapLimiter(occupiedHeapPercentageThreshold).install();
}
if (registeredOccupiedHeapPercentageThreshold != occupiedHeapPercentageThreshold) {
throw new OptionsParsingException(
"Old threshold of "
+ registeredOccupiedHeapPercentageThreshold
+ " not equal to new threshold of "
+ occupiedHeapPercentageThreshold
+ ". To change the threshold, shut down the server and restart it with the desired "
+ "value");
}
RetainedHeapLimiter() {
this(ManagementFactory.getGarbageCollectorMXBeans());
}

private boolean installed = false;
private final AtomicBoolean throwingOom = new AtomicBoolean(false);
private long lastTriggeredGcInMilliseconds = 0;
private final int occupiedHeapPercentageThreshold;
@VisibleForTesting
RetainedHeapLimiter(List<GarbageCollectorMXBean> gcBeans) {
tenuredGcEmitters = findTenuredCollectorBeans(gcBeans);
Preconditions.checkState(
!tenuredGcEmitters.isEmpty(),
"Can't find tenured space; update this class for a new collector");
}

RetainedHeapLimiter(int occupiedHeapPercentageThreshold) {
this.occupiedHeapPercentageThreshold = occupiedHeapPercentageThreshold;
@ThreadSafety.ThreadCompatible // Can only be called on the logical main Bazel thread.
void updateThreshold(int occupiedHeapPercentageThreshold) throws AbruptExitException {
if (occupiedHeapPercentageThreshold < 0 || occupiedHeapPercentageThreshold > 100) {
throw new AbruptExitException(
"--experimental_oom_more_eagerly_threshold must be a percent between 0 and 100 but was "
+ occupiedHeapPercentageThreshold,
ExitCode.COMMAND_LINE_ERROR);
}
boolean alreadyInstalled = this.occupiedHeapPercentageThreshold.isPresent();
this.occupiedHeapPercentageThreshold =
occupiedHeapPercentageThreshold < 100
? OptionalInt.of(occupiedHeapPercentageThreshold)
: OptionalInt.empty();
boolean shouldBeInstalled = this.occupiedHeapPercentageThreshold.isPresent();
if (alreadyInstalled && !shouldBeInstalled) {
for (NotificationEmitter emitter : tenuredGcEmitters) {
try {
emitter.removeNotificationListener(this, null, null);
} catch (ListenerNotFoundException e) {
logger.atWarning().log("Couldn't remove self as listener from %s", emitter);
}
}
} else if (!alreadyInstalled && shouldBeInstalled) {
tenuredGcEmitters.forEach(e -> e.addNotificationListener(this, null, null));
}
}

void install() {
Preconditions.checkState(!installed, "RetainedHeapLimiter installed twice");
installed = true;
List<GarbageCollectorMXBean> gcbeans = ManagementFactory.getGarbageCollectorMXBeans();
boolean foundTenured = false;
@VisibleForTesting
static ImmutableList<NotificationEmitter> findTenuredCollectorBeans(
List<GarbageCollectorMXBean> gcBeans) {
ImmutableList.Builder<NotificationEmitter> builder = ImmutableList.builder();
// Examine all collectors and register for notifications from those which collect the tenured
// space. Normally there is one such collector.
for (GarbageCollectorMXBean gcbean : gcbeans) {
boolean collectsTenured = false;
for (String name : gcbean.getMemoryPoolNames()) {
collectsTenured |= isTenuredSpace(name);
}
if (collectsTenured) {
foundTenured = true;
NotificationEmitter emitter = (NotificationEmitter) gcbean;
emitter.addNotificationListener(this, null, null);
for (GarbageCollectorMXBean gcBean : gcBeans) {
for (String name : gcBean.getMemoryPoolNames()) {
if (isTenuredSpace(name)) {
builder.add((NotificationEmitter) gcBean);
}
}
}
if (!foundTenured) {
throw new IllegalStateException(
"Can't find tenured space; update this class for a new collector");
}
return builder.build();
}

// Can be called concurrently, handles concurrent calls with #updateThreshold gracefully.
@ThreadSafety.ThreadSafe
@Override
public void handleNotification(Notification notification, Object handback) {
if (!notification
.getType()
.equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION)) {
return;
}
// Get a local reference to guard against concurrent modifications.
OptionalInt occupiedHeapPercentageThreshold = this.occupiedHeapPercentageThreshold;
if (!occupiedHeapPercentageThreshold.isPresent()) {
// Presumably failure above to uninstall this listener, or a racy GC.
logger.atInfo().atMostEvery(1, TimeUnit.MINUTES).log(
"Got notification %s when should be disabled", notification);
return;
}
GarbageCollectionNotificationInfo info =
GarbageCollectionNotificationInfo.from((CompositeData) notification.getUserData());
Map<String, MemoryUsage> spaces = info.getGcInfo().getMemoryUsageAfterGc();
for (Map.Entry<String, MemoryUsage> entry : spaces.entrySet()) {
if (isTenuredSpace(entry.getKey())) {
MemoryUsage space = entry.getValue();
if (space.getMax() == 0) {
// The CMS collector sometimes passes us nonsense stats.
// The collector sometimes passes us nonsense stats.
continue;
}

long percentUsed = 100 * space.getUsed() / space.getMax();
if (percentUsed > occupiedHeapPercentageThreshold) {
if (percentUsed > occupiedHeapPercentageThreshold.getAsInt()) {
if (info.getGcCause().equals("System.gc()") && !throwingOom.getAndSet(true)) {
// Assume we got here from a GC initiated by the other branch.
String exitMsg =
Expand All @@ -122,20 +149,16 @@ public void handleNotification(Notification notification, Object handback) {
space.getMax(),
occupiedHeapPercentageThreshold);
System.err.println(exitMsg);
logger.info(exitMsg);
logger.atInfo().log(exitMsg);
// Exits the runtime.
BugReport.handleCrash(new OutOfMemoryError(exitMsg));
} else if (System.currentTimeMillis() - lastTriggeredGcInMilliseconds
} else if (System.currentTimeMillis() - lastTriggeredGcInMilliseconds.get()
> MIN_TIME_BETWEEN_TRIGGERED_GC_MILLISECONDS) {
logger.info(
"Triggering a full GC with "
+ space.getUsed()
+ " out of "
+ space.getMax()
+ " used");
logger.atInfo().log(
"Triggering a full GC with %s out of %s used", space.getUsed(), space.getMax());
// Force a full stop-the-world GC and see if it can get us below the threshold.
System.gc();
lastTriggeredGcInMilliseconds = System.currentTimeMillis();
lastTriggeredGcInMilliseconds.set(System.currentTimeMillis());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright 2019 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.runtime;

import static com.google.common.truth.Truth.assertThat;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.withSettings;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.util.AbruptExitException;
import java.lang.management.GarbageCollectorMXBean;
import javax.management.ListenerNotFoundException;
import javax.management.NotificationEmitter;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.mockito.Mockito;

/**
* Basic tests for {@link RetainedHeapLimiter}. More realistic tests are hard because {@link
* RetainedHeapLimiter} intentionally crashes the JVM.
*/
@RunWith(JUnit4.class)
public class RetainedHeapLimiterTest {
@Test
public void findBeans() {
GarbageCollectorMXBean mockUselessBean = Mockito.mock(GarbageCollectorMXBean.class);
String[] untenuredPoolNames = {"assistant", "adjunct"};
Mockito.when(mockUselessBean.getMemoryPoolNames()).thenReturn(untenuredPoolNames);
GarbageCollectorMXBean mockBean =
Mockito.mock(
GarbageCollectorMXBean.class,
withSettings().extraInterfaces(NotificationEmitter.class));
String[] poolNames = {"not tenured", "CMS Old Gen"};
Mockito.when(mockBean.getMemoryPoolNames()).thenReturn(poolNames);
assertThat(
RetainedHeapLimiter.findTenuredCollectorBeans(
ImmutableList.of(mockUselessBean, mockBean)))
.containsExactly(mockBean);
}

@Test
public void smoke() throws AbruptExitException, ListenerNotFoundException {
GarbageCollectorMXBean mockUselessBean = Mockito.mock(GarbageCollectorMXBean.class);
String[] untenuredPoolNames = {"assistant", "adjunct"};
Mockito.when(mockUselessBean.getMemoryPoolNames()).thenReturn(untenuredPoolNames);
GarbageCollectorMXBean mockBean =
Mockito.mock(
GarbageCollectorMXBean.class,
withSettings().extraInterfaces(NotificationEmitter.class));
String[] poolNames = {"not tenured", "CMS Old Gen"};
Mockito.when(mockBean.getMemoryPoolNames()).thenReturn(poolNames);

RetainedHeapLimiter underTest =
new RetainedHeapLimiter(ImmutableList.of(mockUselessBean, mockBean));
underTest.updateThreshold(100);
Mockito.verify((NotificationEmitter) mockBean, never())
.addNotificationListener(underTest, null, null);
Mockito.verify((NotificationEmitter) mockBean, never())
.removeNotificationListener(underTest, null, null);

underTest.updateThreshold(90);
Mockito.verify((NotificationEmitter) mockBean, times(1))
.addNotificationListener(underTest, null, null);
Mockito.verify((NotificationEmitter) mockBean, never())
.removeNotificationListener(underTest, null, null);

underTest.updateThreshold(80);
// No additional calls.
Mockito.verify((NotificationEmitter) mockBean, times(1))
.addNotificationListener(underTest, null, null);
Mockito.verify((NotificationEmitter) mockBean, never())
.removeNotificationListener(underTest, null, null);

underTest.updateThreshold(100);
Mockito.verify((NotificationEmitter) mockBean, times(1))
.addNotificationListener(underTest, null, null);
Mockito.verify((NotificationEmitter) mockBean, times(1))
.removeNotificationListener(underTest, null, null);
}
}

0 comments on commit 5ef93ef

Please sign in to comment.