Skip to content

Commit

Permalink
Implement asynchronous glob prefetching to specifically prefetch the …
Browse files Browse the repository at this point in the history
…globs() of

a file.

There is currently a mechanism in Bazel to asynchronously go off and prefetch
file system content, specifically by recursively reading subdirectories up to
maxDirectoriesToEagerlyVisitInGlobbing. This is started when the first glob in
a file is found. It is useful to warm up caches in networked filesystems, but
doesn't actually help evaluating the globs().  As it can create extra load on
the filesystem, it can actually make evaluation slower. Moreover, it implicitly
relies on the fact that in common packages, most files are actually read into
globs() in some way, which isn't always the case.

Instead, we can visit a build file's AST and eagerly try to explicitly extract
globs that should be visited. This change is doing that if
maxDirectoriesToEagerlyVisitInGlobbing is set to a sentinal value of -2. The
visitor cannot understand all globs, e.g. when some of the pattern strings are
constructed from complex expression, but the majority of globs is explicitly
spelled out.

RELNOTES: None.
PiperOrigin-RevId: 234751342
djasper-gh authored and copybara-github committed Feb 20, 2019
1 parent 3b6e35e commit a57c879
Showing 2 changed files with 103 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -41,6 +41,7 @@
import com.google.devtools.build.lib.skylarkinterface.Param;
import com.google.devtools.build.lib.skylarkinterface.SkylarkSignature;
import com.google.devtools.build.lib.skylarkinterface.SkylarkValue;
import com.google.devtools.build.lib.syntax.Argument.Passed;
import com.google.devtools.build.lib.syntax.BaseFunction;
import com.google.devtools.build.lib.syntax.BuildFileAST;
import com.google.devtools.build.lib.syntax.BuiltinFunction;
@@ -49,8 +50,12 @@
import com.google.devtools.build.lib.syntax.Environment.Extension;
import com.google.devtools.build.lib.syntax.EvalException;
import com.google.devtools.build.lib.syntax.EvalUtils;
import com.google.devtools.build.lib.syntax.Expression;
import com.google.devtools.build.lib.syntax.FuncallExpression;
import com.google.devtools.build.lib.syntax.FunctionSignature;
import com.google.devtools.build.lib.syntax.Identifier;
import com.google.devtools.build.lib.syntax.IntegerLiteral;
import com.google.devtools.build.lib.syntax.ListLiteral;
import com.google.devtools.build.lib.syntax.Mutability;
import com.google.devtools.build.lib.syntax.ParserInputSource;
import com.google.devtools.build.lib.syntax.Runtime;
@@ -62,6 +67,8 @@
import com.google.devtools.build.lib.syntax.SkylarkUtils.Phase;
import com.google.devtools.build.lib.syntax.StarlarkSemantics;
import com.google.devtools.build.lib.syntax.Statement;
import com.google.devtools.build.lib.syntax.StringLiteral;
import com.google.devtools.build.lib.syntax.SyntaxTreeVisitor;
import com.google.devtools.build.lib.syntax.Type;
import com.google.devtools.build.lib.syntax.Type.ConversionException;
import com.google.devtools.build.lib.vfs.FileSystem;
@@ -73,6 +80,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -423,7 +431,9 @@ public void setGlobbingThreads(int globbingThreads) {
/**
* Sets the number of directories to eagerly traverse on the first glob for a given package, in
* order to warm the filesystem. -1 means do no eager traversal. See {@code
* PackageCacheOptions#maxDirectoriesToEagerlyVisitInGlobbing}.
* PackageCacheOptions#maxDirectoriesToEagerlyVisitInGlobbing}. -2 means do the eager traversal
* using the regular globbing infrastructure, i.e. sharing the globbing threads and caching the
* actual glob results.
*/
public void setMaxDirectoriesToEagerlyVisitInGlobbing(
int maxDirectoriesToEagerlyVisitInGlobbing) {
@@ -1664,6 +1674,17 @@ public Package.Builder evaluateBuildFile(
pkgBuilder.setContainsErrors();
}

if (maxDirectoriesToEagerlyVisitInGlobbing == -2) {
GlobPatternExtractor extractor = new GlobPatternExtractor();
extractor.visit(buildFileAST);
try {
globber.runAsync(extractor.getIncludeDirectoriesPatterns(), ImmutableList.of(), false);
globber.runAsync(extractor.getExcludeDirectoriesPatterns(), ImmutableList.of(), true);
} catch (BadGlobException | InterruptedException e) {
// Ignore exceptions. Errors will be properly reported when the actual globbing is done.
}
}

// TODO(bazel-team): (2009) the invariant "if errors are reported, mark the package
// as containing errors" is strewn all over this class. Refactor to use an
// event sensor--and see if we can simplify the calling code in
@@ -1678,6 +1699,64 @@ public Package.Builder evaluateBuildFile(
return pkgBuilder;
}

/**
* A GlobPatternExtractor visits a syntax tree, tries to extract glob() patterns from it, and
* eagerly instructs a {@link Globber} to fetch them asynchronously. That way, the glob results
* are readily available when required in the actual execution of the syntax tree. The starlark
* code itself is later executed sequentially and having costly globs, especially slow on
* networked file systems, executed sequentially in them can be very time consuming.
*/
@VisibleForTesting
static class GlobPatternExtractor extends SyntaxTreeVisitor {
private final Set<String> includeDirectoriesPatterns = new HashSet<>();
private final Set<String> excludeDirectoriesPatterns = new HashSet<>();

@Override
public void visit(FuncallExpression node) {
super.visit(node);
Expression function = node.getFunction();
if (!(function instanceof Identifier)) {
return;
}
if (!((Identifier) function).getName().equals("glob")) {
return;
}

boolean excludeDirectories = true; // excluded by default.
List<String> globStrings = new ArrayList<>();
for (Passed arg : node.getArguments()) {
if (arg.getIdentifier() != null
&& arg.getIdentifier().getName().equals("exclude_directories")) {
if (arg.getValue() instanceof IntegerLiteral) {
excludeDirectories = ((IntegerLiteral) arg.getValue()).getValue() != 0;
}
continue;
}
if (arg.getValue() instanceof ListLiteral) {
ListLiteral list = (ListLiteral) arg.getValue();
for (Expression elem : list.getElements()) {
if (elem instanceof StringLiteral) {
globStrings.add(((StringLiteral) elem).getValue());
}
}
}
}
if (excludeDirectories) {
excludeDirectoriesPatterns.addAll(globStrings);
} else {
includeDirectoriesPatterns.addAll(globStrings);
}
}

List<String> getIncludeDirectoriesPatterns() {
return ImmutableList.copyOf(includeDirectoriesPatterns);
}

List<String> getExcludeDirectoriesPatterns() {
return ImmutableList.copyOf(excludeDirectoriesPatterns);
}
}

// Reports an error and returns false iff package identifier was illegal.
private static boolean validatePackageIdentifier(
PackageIdentifier packageId, Location location, ExtendedEventHandler eventHandler) {
Original file line number Diff line number Diff line change
@@ -25,8 +25,10 @@
import com.google.devtools.build.lib.cmdline.Label;
import com.google.devtools.build.lib.cmdline.PackageIdentifier;
import com.google.devtools.build.lib.events.Reporter;
import com.google.devtools.build.lib.packages.PackageFactory.GlobPatternExtractor;
import com.google.devtools.build.lib.packages.util.PackageFactoryApparatus;
import com.google.devtools.build.lib.packages.util.PackageFactoryTestBase;
import com.google.devtools.build.lib.syntax.BuildFileAST;
import com.google.devtools.build.lib.syntax.Type;
import com.google.devtools.build.lib.testutil.MoreAsserts;
import com.google.devtools.build.lib.testutil.TestUtils;
@@ -1230,4 +1232,25 @@ protected PackageFactoryApparatus createPackageFactoryApparatus() {
protected String getPathPrefix() {
return "";
}

@Test
public void testGlobPatternExtractor() {
GlobPatternExtractor globPatternExtractor = new GlobPatternExtractor();
globPatternExtractor.visit(
BuildFileAST.parseString(
event -> {
throw new IllegalArgumentException(event.getMessage());
},
"pattern = '*'",
"some_variable = glob([",
" '**/*',",
" 'a' + 'b',",
" pattern,",
"])",
"other_variable = glob(include = ['a'], exclude = ['b'])",
"third_variable = glob(['c'], exclude_directories = 0)"));
assertThat(globPatternExtractor.getExcludeDirectoriesPatterns())
.containsExactly("ab", "a", "b", "**/*");
assertThat(globPatternExtractor.getIncludeDirectoriesPatterns()).containsExactly("c");
}
}

0 comments on commit a57c879

Please sign in to comment.