Skip to content

Commit

Permalink
Reduce flakiness on Windows for BwoB tests
Browse files Browse the repository at this point in the history
`BuildWithoutTheBytesIntegrationTest` has been flaky on Windows. Example error leads to file system errors.

```
1) incrementalBuild_treeArtifacts_correctlyProducesNewTree(com.google.devtools.build.lib.remote.BuildWithoutTheBytesIntegrationTest)
java.io.IOException: C:/b/iahsqj7l/execroot/io_bazel/_tmp/b474bcb9956744b234827924ec01952d/remote.pid_file (Permission denied)
	at com.google.devtools.build.lib.windows.WindowsFileSystem.delete(WindowsFileSystem.java:60)
	at com.google.devtools.build.lib.vfs.Path.delete(Path.java:587)
	at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.bestEffortDeleteTreesBelow(BuildIntegrationTestCase.java:387)
	at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.cleanUp(BuildIntegrationTestCase.java:364)
	... 26 trimmed
Caused by: java.nio.file.AccessDeniedException: C:/b/iahsqj7l/execroot/io_bazel/_tmp/b474bcb9956744b234827924ec01952d/remote.pid_file
	at com.google.devtools.build.lib.windows.WindowsFileOperations.deletePath(WindowsFileOperations.java:279)
	at com.google.devtools.build.lib.windows.WindowsFileSystem.delete(WindowsFileSystem.java:56)
	... 30 more
2) downloadToplevel_treeArtifacts(com.google.devtools.build.lib.remote.BuildWithoutTheBytesIntegrationTest)
java.io.IOException: C:/b/iahsqj7l/execroot/io_bazel/_tmp/b474bcb9956744b234827924ec01952d/remote.work_path already exists
	at com.google.devtools.build.lib.remote.util.IntegrationTestUtils.ensureMkdir(IntegrationTestUtils.java:116)
	at com.google.devtools.build.lib.remote.util.IntegrationTestUtils.startWorker(IntegrationTestUtils.java:87)
	at com.google.devtools.build.lib.remote.util.IntegrationTestUtils.startWorker(IntegrationTestUtils.java:77)
	at com.google.devtools.build.lib.remote.BuildWithoutTheBytesIntegrationTest.setupOptions(BuildWithoutTheBytesIntegrationTest.java:49)
	at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.createRuntimeWrapper(BuildIntegrationTestCase.java:317)
	at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.createFilesAndMocks(BuildIntegrationTestCase.java:234)
```

This PR fixes 1) by not using pid but checking port status directly to wait for the remote worker to start up and 2) by cleaning up resources used by remote worker after each test case.

Closes bazelbuild#17479.

PiperOrigin-RevId: 509549826
Change-Id: Icef2be46d5f2a6025cad0cefcf766ae16b4552d8
  • Loading branch information
coeuvre authored and fmeum committed Feb 16, 2023
1 parent 102275d commit 9ea258e
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 26 deletions.
2 changes: 2 additions & 0 deletions src/test/java/com/google/devtools/build/lib/remote/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ java_test(
size = "large",
srcs = ["BuildWithoutTheBytesIntegrationTest.java"],
shard_count = 5,
tags = ["requires-network"],
runtime_deps = [
"//third_party/grpc-java:grpc-jar",
],
Expand All @@ -171,6 +172,7 @@ java_test(
java_test(
name = "DiskCacheIntegrationTest",
srcs = ["DiskCacheIntegrationTest.java"],
tags = ["requires-network"],
runtime_deps = [
"//third_party/grpc-java:grpc-jar",
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@
import java.io.File;
import java.io.IOException;
import java.net.DatagramSocket;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.net.SocketException;
import java.nio.channels.SocketChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.Random;

/** Integration test utilities. */
Expand Down Expand Up @@ -73,6 +79,30 @@ public static int pickUnusedRandomPort() throws IOException, InterruptedExceptio
throw new IOException("Failed to find available port");
}

private static void waitForPortOpen(Subprocess process, int port)
throws IOException, InterruptedException {
var addr = new InetSocketAddress("localhost", port);
var timeout = new IOException("Timed out when waiting for port to open");
for (var i = 0; i < 20; ++i) {
if (!process.isAlive()) {
var message = new String(process.getErrorStream().readAllBytes(), UTF_8);
throw new IOException("Failed to start worker: " + message);
}

try {
try (var socketChannel = SocketChannel.open()) {
socketChannel.configureBlocking(/* block= */ true);
socketChannel.connect(addr);
}
return;
} catch (IOException e) {
timeout.addSuppressed(e);
Thread.sleep(1000);
}
}
throw timeout;
}

public static WorkerInstance startWorker() throws IOException, InterruptedException {
return startWorker(/* useHttp= */ false);
}
Expand All @@ -82,7 +112,6 @@ public static WorkerInstance startWorker(boolean useHttp)
PathFragment testTmpDir = PathFragment.create(tmpDirFile().getAbsolutePath());
PathFragment workPath = testTmpDir.getRelative("remote.work_path");
PathFragment casPath = testTmpDir.getRelative("remote.cas_path");
PathFragment pidPath = testTmpDir.getRelative("remote.pid_file");
int workerPort = pickUnusedRandomPort();
ensureMkdir(workPath);
ensureMkdir(casPath);
Expand All @@ -94,20 +123,10 @@ public static WorkerInstance startWorker(boolean useHttp)
workerPath,
"--work_path=" + workPath.getSafePathString(),
"--cas_path=" + casPath.getSafePathString(),
(useHttp ? "--http_listen_port=" : "--listen_port=") + workerPort,
"--pid_file=" + pidPath))
(useHttp ? "--http_listen_port=" : "--listen_port=") + workerPort))
.start();

File pidFile = new File(pidPath.getSafePathString());
while (!pidFile.exists()) {
if (!workerProcess.isAlive()) {
String message = new String(workerProcess.getErrorStream().readAllBytes(), UTF_8);
throw new IOException("Failed to start worker: " + message);
}
Thread.sleep(1);
}

return new WorkerInstance(workerProcess, workerPort, workPath, casPath, pidPath);
waitForPortOpen(workerProcess, workerPort);
return new WorkerInstance(workerProcess, workerPort, workPath, casPath);
}

private static void ensureMkdir(PathFragment path) throws IOException {
Expand All @@ -125,23 +144,25 @@ public static class WorkerInstance {
private final int port;
private final PathFragment workPath;
private final PathFragment casPath;
private final PathFragment pidPath;

private WorkerInstance(
Subprocess process,
int port,
PathFragment workPath,
PathFragment casPath,
PathFragment pidPath) {
Subprocess process, int port, PathFragment workPath, PathFragment casPath) {
this.process = process;
this.port = port;
this.workPath = workPath;
this.casPath = casPath;
this.pidPath = pidPath;
}

public void stop() {
public void stop() throws IOException {
process.destroyAndWait();
deleteDir(workPath);
deleteDir(casPath);
}

private static void deleteDir(PathFragment path) throws IOException {
try (var stream = Files.walk(Paths.get(path.getSafePathString()))) {
stream.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
}
}

public int getPort() {
Expand All @@ -155,9 +176,5 @@ public PathFragment getWorkPath() {
public PathFragment getCasPath() {
return casPath;
}

public PathFragment getPidPath() {
return pidPath;
}
}
}

0 comments on commit 9ea258e

Please sign in to comment.