Skip to content

Commit

Permalink
Add a genquery boolean parameter (compressed_output) to control whe…
Browse files Browse the repository at this point in the history
…ther the genquery output is compressed or not

Decompressed genquery output can make blaze post GC peak heap size spike due to huge local variables created. `compressed_output` genquery parameter is added in order to avoid blaze OOM due to these large local variables.

User can opt to add this parameter to their `genquery` method in bzl file when output is expected to be huge. Then the output can be decompressed downstream.

Some refactors are also done in `GenQueryOutputStreamTest` to reduce code duplication.

PiperOrigin-RevId: 547848995
Change-Id: Ia84df3b86f217421756a1886814b8425e5ddac5b
  • Loading branch information
yuyue730 authored and copybara-github committed Jul 13, 2023
1 parent 9fb98e3 commit 7f639c6
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,11 @@ private static GenQueryResult doQuery(
throw new RuntimeException(e);
}

GenQueryOutputStream outputStream = new GenQueryOutputStream();
Set<Target> result = targets.getResult();
try {
boolean compressedOutputRequested =
ruleContext.attributes().get("compressed_output", Type.BOOLEAN);
GenQueryOutputStream outputStream = new GenQueryOutputStream(compressedOutputRequested);
Set<Target> result = targets.getResult();
QueryOutputUtils.output(
queryOptions,
queryResult,
Expand All @@ -366,13 +368,12 @@ private static GenQueryResult doQuery(
hashFunction,
queryEnvironment.getMainRepoMapping());
outputStream.close();
return outputStream.getResult();
} catch (ClosedByInterruptException e) {
throw new InterruptedException(e.getMessage());
} catch (IOException e) {
throw new RuntimeException(e);
}

return outputStream.getResult();
}

@Immutable // assuming no other reference to result
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,18 @@
* {@link OutputStream} implementation optimized for {@link GenQuery} by (optionally) compressing
* query results on the fly. Produces {@link GenQueryResult}s which are preferred for storing the
* output of {@link GenQuery}'s underlying queries.
*
* <p>The produced {@link GenQueryResult}s can also be in gzipped compressed format if the genquery
* definition explicitly sets {@code compressed_output} parameter to {@code True}.
*/
class GenQueryOutputStream extends OutputStream {

/**
* When compression is enabled, the threshold at which the stream will switch to compressing
* output. The value of this constant is arbitrary but effective.
*
* <p>If genquery definition explicitly sets {@code compressed_output} parameter to {@code True},
* the stream will be compressed regardless of whether its size reaches this threshold.
*/
private static final int COMPRESSION_THRESHOLD = 1 << 20;

Expand All @@ -48,6 +54,9 @@ interface GenQueryResult {
/**
* Adds the query output to the supplied {@link Fingerprint}. Equivalent to {@code
* fingerprint.addBytes(genQueryResult.getBytes())}, but potentially more efficient.
*
* <p>A boolean indicating whether the query output is compressed or not is added to the
* supplied {@link Fingerprint} first.
*/
void fingerprint(Fingerprint fingerprint);

Expand All @@ -63,11 +72,24 @@ interface GenQueryResult {
*/
void writeTo(OutputStream out) throws IOException;
}

private int bytesWritten = 0;
private boolean compressed = false;
private boolean outputWasCompressed;
private boolean closed = false;
private ByteString.Output bytesOut = ByteString.newOutput();
private OutputStream out = bytesOut;
private OutputStream out;
private final boolean compressedOutputRequested;

GenQueryOutputStream(boolean compressedOutputRequested) throws IOException {
this.compressedOutputRequested = compressedOutputRequested;
if (compressedOutputRequested) {
this.out = new GZIPOutputStream(bytesOut);
this.outputWasCompressed = true;
} else {
this.out = bytesOut;
this.outputWasCompressed = false;
}
}

@Override
public void write(int b) throws IOException {
Expand Down Expand Up @@ -101,17 +123,17 @@ public void close() throws IOException {

GenQueryResult getResult() {
Preconditions.checkState(closed, "Must be closed");
return compressed
? new CompressedResult(bytesOut.toByteString(), bytesWritten)
: new RegularResult(bytesOut.toByteString());
return !outputWasCompressed || compressedOutputRequested
? new SimpleResult(bytesOut.toByteString())
: new CompressedResultWithDecompressedOutput(bytesOut.toByteString(), bytesWritten);
}

private void maybeStartCompression(int additionalBytes) throws IOException {
if (compressed) {
if (outputWasCompressed) {
return;
}

if (bytesWritten + additionalBytes < COMPRESSION_THRESHOLD) {
if (!compressedOutputRequested && bytesWritten + additionalBytes < COMPRESSION_THRESHOLD) {
return;
}

Expand All @@ -120,14 +142,18 @@ private void maybeStartCompression(int additionalBytes) throws IOException {
bytesOut.writeTo(gzipOut);
bytesOut = compressedBytesOut;
out = gzipOut;
compressed = true;
outputWasCompressed = true;
}

/**
* Used when input and output GenQuery result data are in the same format, so no decompression or
* other data transformation is needed.
*/
@VisibleForTesting
static class RegularResult implements GenQueryResult {
static final class SimpleResult implements GenQueryResult {
private final ByteString data;

RegularResult(ByteString data) {
SimpleResult(ByteString data) {
this.data = data;
}

Expand All @@ -152,12 +178,13 @@ public void writeTo(OutputStream out) throws IOException {
}
}

/** Used when input GenQuery result is in compressed format and output should be decompressed. */
@VisibleForTesting
static class CompressedResult implements GenQueryResult {
static final class CompressedResultWithDecompressedOutput implements GenQueryResult {
private final ByteString compressedData;
private final int size;

CompressedResult(ByteString compressedData, int size) {
CompressedResultWithDecompressedOutput(ByteString compressedData, int size) {
this.compressedData = compressedData;
this.size = size;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@
import com.google.devtools.build.lib.analysis.RuleDefinitionEnvironment;
import com.google.devtools.build.lib.packages.RuleClass;

/**
* Rule definition for genquery the rule.
*/
/** Rule definition for genquery the rule. */
public final class GenQueryRule implements RuleDefinition {

/** Adds {@link GenQueryRule} and its dependencies to the provided builder. */
Expand Down Expand Up @@ -67,6 +65,15 @@ public RuleClass build(RuleClass.Builder builder, RuleDefinitionEnvironment env)
will have their default values just like on the command line of <code>bazel query</code>.
<!-- #END_BLAZE_RULE.ATTRIBUTE --> */
.add(attr("opts", STRING_LIST))
/* <!-- #BLAZE_RULE(genquery).ATTRIBUTE(compressed_output) -->
If <code>True</code>, query output is written in GZIP file format. This setting can be used
to avoid spikes in Bazel's memory use when the query output is expected to be large. Bazel
already internally compresses query outputs greater than 2<sup>20</sup> bytes regardless of
the value of this setting, so setting this to <code>True</code> may not reduce retained
heap. However, it allows Bazel to skip <em>decompression</em> when writing the output file,
which can be memory-intensive.
<!-- #END_BLAZE_RULE.ATTRIBUTE --> */
.add(attr("compressed_output", BOOLEAN).value(false))
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ java_test(
"//third_party:guava",
"//third_party:junit4",
"//third_party:truth",
"//third_party/protobuf:protobuf_java",
"@com_google_testparameterinjector//:testparameterinjector",
],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,26 @@

import static com.google.common.truth.Truth.assertThat;
import static com.google.devtools.build.lib.testutil.MoreAsserts.assertContainsEvent;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertThrows;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.io.ByteStreams;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.BuildFailedException;
import com.google.devtools.build.lib.analysis.ViewCreationFailedException;
import com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase;
import com.google.devtools.build.lib.skyframe.TransitiveTargetKey;
import com.google.protobuf.ByteString;
import com.google.testing.junit.testparameterinjector.TestParameter;
import com.google.testing.junit.testparameterinjector.TestParameterInjector;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.junit.Test;
import org.junit.runner.RunWith;

Expand Down Expand Up @@ -595,6 +600,29 @@ public void testAspectDepChain() throws Exception {
"//start:startdep");
}

@Test
public void testGenQueryOutputCompressed() throws Exception {
write(
"fruits/BUILD",
"sh_library(name='melon', deps=[':papaya'])",
"sh_library(name='papaya')",
"genquery(name='q',",
" scope=[':melon'],",
" compressed_output=True,",
" expression='deps(//fruits:melon)')");

buildTarget("//fruits:q");
Artifact output = Iterables.getOnlyElement(getArtifacts("//fruits:q"));
ByteString compressedContent = readContentAsByteArray(output);

ByteArrayOutputStream decompressedOut = new ByteArrayOutputStream();
try (GZIPInputStream gzipIn = new GZIPInputStream(compressedContent.newInput())) {
ByteStreams.copy(gzipIn, decompressedOut);
}

assertThat(decompressedOut.toString(UTF_8)).isEqualTo("//fruits:melon\n//fruits:papaya\n");
}

private void assertQueryResult(String queryTarget, String... expected) throws Exception {
assertThat(getQueryResult(queryTarget).split("\n"))
.asList()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
import com.google.devtools.common.options.OptionsParsingResult;
import com.google.errorprone.annotations.FormatMethod;
import com.google.errorprone.annotations.Keep;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.lang.Thread.UncaughtExceptionHandler;
import java.util.ArrayList;
Expand Down Expand Up @@ -947,6 +948,10 @@ protected String readContentAsLatin1String(Artifact artifact) throws IOException
return new String(FileSystemUtils.readContentAsLatin1(artifact.getPath()));
}

protected ByteString readContentAsByteArray(Artifact artifact) throws IOException {
return ByteString.copyFrom(FileSystemUtils.readContent(artifact.getPath()));
}

/**
* Given a collection of Artifacts, returns a corresponding set of strings of the form "<root>
* <relpath>", such as "bin x/libx.a". Such strings make assertions easier to write.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ java_test(
"//third_party:junit4",
"//third_party:truth",
"//third_party/protobuf:protobuf_java",
"@com_google_testparameterinjector//:testparameterinjector",
],
)
Loading

0 comments on commit 7f639c6

Please sign in to comment.