Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict string escape sequences and introduce flag #8526

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions site/docs/skylark/backward-compatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Full, authorative list of incompatible changes is [GitHub issues with
General Starlark

* [Dictionary concatenation](#dictionary-concatenation)
* [String escapes](#string-escapes)
* [Load must appear at top of file](#load-must-appear-at-top-of-file)
* [Depset is no longer iterable](#depset-is-no-longer-iterable)
* [Depset union](#depset-union)
Expand Down Expand Up @@ -74,6 +75,16 @@ with Python. A possible workaround is to use the `.update` method instead.
* Default: `true`
* Tracking issue: [#6461](https://github.com/bazelbuild/bazel/issues/6461)

### String escapes

We are restricting unrecognized escape sequences. Trying to include escape
sequences like `\a`, `\b` or any other escape sequence that is unknown to
Starlark will result in a syntax error.

* Flag: `--incompatible_restrict_escape_sequences`
* Default: `false`
* Tracking issue: [#8380](https://github.com/bazelbuild/bazel/issues/8380)

### Load must appear at top of file

Previously, the `load` statement could appear anywhere in a `.bzl` file so long
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,18 @@ public class StarlarkSemanticsOptions extends OptionsBase implements Serializabl
+ "returns a depset instead.")
public boolean incompatibleDepsetForLibrariesToLinkGetter;

@Option(
name = "incompatible_restrict_string_escapes",
defaultValue = "false",
documentationCategory = OptionDocumentationCategory.STARLARK_SEMANTICS,
effectTags = {OptionEffectTag.BUILD_FILE_SEMANTICS},
metadataTags = {
OptionMetadataTag.INCOMPATIBLE_CHANGE,
OptionMetadataTag.TRIGGERED_BY_ALL_INCOMPATIBLE_CHANGES
},
help = "If set to true, unknown string escapes like `\\a` become rejected.")
public boolean incompatibleRestrictStringEscapes;

/** Constructs a {@link StarlarkSemantics} object corresponding to this set of option values. */
public StarlarkSemantics toSkylarkSemantics() {
return StarlarkSemantics.builder()
Expand Down Expand Up @@ -662,6 +674,7 @@ public StarlarkSemantics toSkylarkSemantics() {
.internalSkylarkFlagTestCanary(internalSkylarkFlagTestCanary)
.incompatibleDoNotSplitLinkingCmdline(incompatibleDoNotSplitLinkingCmdline)
.incompatibleDepsetForLibrariesToLinkGetter(incompatibleDepsetForLibrariesToLinkGetter)
.incompatibleRestrictStringEscapes(incompatibleRestrictStringEscapes)
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ private Extension createExtension(
public static void execAndExport(BuildFileAST ast, Label extensionLabel,
EventHandler eventHandler,
com.google.devtools.build.lib.syntax.Environment extensionEnv) throws InterruptedException {
ast.replayLexerEvents(extensionEnv, eventHandler);
ImmutableList<Statement> statements = ast.getStatements();
for (Statement statement : statements) {
ast.execTopLevelStatement(statement, extensionEnv, eventHandler);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ public class BuildFileAST extends ASTNode {
*/
private final boolean containsErrors;

private final List<Event> stringEscapeEvents;

@Nullable private final String contentHashCode;

private BuildFileAST(
Expand All @@ -57,13 +59,15 @@ private BuildFileAST(
String contentHashCode,
Location location,
ImmutableList<Comment> comments,
@Nullable ImmutableList<SkylarkImport> imports) {
@Nullable ImmutableList<SkylarkImport> imports,
List<Event> stringEscapeEvents) {
this.statements = statements;
this.containsErrors = containsErrors;
this.contentHashCode = contentHashCode;
this.comments = comments;
this.setLocation(location);
this.imports = imports;
this.stringEscapeEvents = stringEscapeEvents;
}

private static BuildFileAST create(
Expand Down Expand Up @@ -98,7 +102,8 @@ private static BuildFileAST create(
contentHashCode,
result.location,
ImmutableList.copyOf(result.comments),
skylarkImports.second);
skylarkImports.second,
result.stringEscapeEvents);
}

private static BuildFileAST create(
Expand Down Expand Up @@ -135,7 +140,8 @@ public BuildFileAST subTree(int firstStatement, int lastStatement) {
null,
this.statements.get(firstStatement).getLocation(),
ImmutableList.of(),
imports.build());
imports.build(),
stringEscapeEvents);
}

/**
Expand Down Expand Up @@ -202,6 +208,15 @@ public ImmutableList<StringLiteral> getRawImports() {
return imports.build();
}

/** Returns true if there was no error event. */
public boolean replayLexerEvents(Environment env, EventHandler eventHandler) {
if (env.getSemantics().incompatibleRestrictStringEscapes() && !stringEscapeEvents.isEmpty()) {
Event.replayEventsOn(eventHandler, stringEscapeEvents);
return false;
}
return true;
}

/**
* Executes this build file in a given Environment.
*
Expand Down Expand Up @@ -371,7 +386,8 @@ public static BuildFileAST parseSkylarkFileWithoutImports(
/* contentHashCode= */ null,
result.location,
ImmutableList.copyOf(result.comments),
/* imports= */ null);
/* imports= */ null,
result.stringEscapeEvents);
}

/**
Expand All @@ -384,7 +400,7 @@ public BuildFileAST validate(Environment env, EventHandler eventHandler) {
if (valid || containsErrors) {
return this;
}
return new BuildFileAST(statements, true, contentHashCode, getLocation(), comments, imports);
return new BuildFileAST(statements, true, contentHashCode, getLocation(), comments, imports, stringEscapeEvents);
}

public static BuildFileAST parseString(EventHandler eventHandler, String... content) {
Expand Down Expand Up @@ -446,6 +462,7 @@ public static Object eval(Environment env, String... input)
public static BuildFileAST parseAndValidateSkylarkString(Environment env, String[] input)
throws EvalException {
BuildFileAST ast = parseString(env.getEventHandler(), input);
ast.replayLexerEvents(env, env.getEventHandler());
ValidationEnvironment.validateAst(env, ast.getStatements());
return ast;
}
Expand Down
20 changes: 19 additions & 1 deletion src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ private static class LocationInfo {

private int dents; // number of saved INDENT (>0) or OUTDENT (<0) tokens to return

/**
* StringEscapeEvents contains the errors related to invalid escape sequences like "\a".
* This is not handled by the normal eventHandler. Instead, it is passed to the parser and
* then the AST. During the evaluation, we can decide to show the events based on a flag
* in StarlarkSemantics. This code is temporary, during the migration.
*/
private List<Event> stringEscapeEvents = new ArrayList<>();

/**
* Constructs a lexer which tokenizes the contents of the specified InputBuffer. Any errors during
* lexing are reported on "handler".
Expand All @@ -129,6 +137,10 @@ List<Comment> getComments() {
return comments;
}

List<Event> getStringEscapeEvents() {
return stringEscapeEvents;
}

/**
* Returns the filename from which the lexer's input came. Returns an empty value if the input
* came from a string.
Expand Down Expand Up @@ -457,10 +469,16 @@ private void escapedStringLiteral(char quot, boolean isRaw) {
case 'v':
case 'x':
// exists in Python but not implemented in Blaze => error
error("escape sequence not implemented: \\" + c, literalStartPos, pos);
error("invalid escape sequence: \\" + c, literalStartPos, pos);
break;
default:
// unknown char escape => "\literal"
stringEscapeEvents.add(Event.error(
createLocation(pos - 1, pos), "invalid escape sequence: \\" + c +
". You can enable unknown escape sequences by passing the flag " +
"--incompatible_restrict_string_escapes=false")
);

literal.append('\\');
literal.append(c);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,21 @@ public static final class ParseResult {
/** Whether the file contained any errors. */
public final boolean containsErrors;

public final List<Event> stringEscapeEvents;

public ParseResult(
List<Statement> statements,
List<Comment> comments,
Location location,
boolean containsErrors) {
boolean containsErrors,
List<Event> stringEscapeEvents) {
// No need to copy here; when the object is created, the parser instance is just about to go
// out of scope and be garbage collected.
this.statements = Preconditions.checkNotNull(statements);
this.comments = Preconditions.checkNotNull(comments);
this.location = location;
this.containsErrors = containsErrors;
this.stringEscapeEvents = stringEscapeEvents;
}
}

Expand Down Expand Up @@ -205,7 +209,7 @@ public static ParseResult parseFile(ParserInputSource input, EventHandler eventH
}
boolean errors = parser.errorsCount > 0 || lexer.containsErrors();
return new ParseResult(
statements, lexer.getComments(), locationFromStatements(lexer, statements), errors);
statements, lexer.getComments(), locationFromStatements(lexer, statements), errors, lexer.getStringEscapeEvents());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ public boolean flagValue(FlagIdentifier flagIdentifier) {

public abstract boolean incompatibleDepsetForLibrariesToLinkGetter();

public abstract boolean incompatibleRestrictStringEscapes();

/** Returns a {@link Builder} initialized with the values of this instance. */
public abstract Builder toBuilder();

Expand Down Expand Up @@ -260,6 +262,7 @@ public static Builder builderWithDefaults() {
.internalSkylarkFlagTestCanary(false)
.incompatibleDoNotSplitLinkingCmdline(true)
.incompatibleDepsetForLibrariesToLinkGetter(true)
.incompatibleRestrictStringEscapes(false)
.build();

/** Builder for {@link StarlarkSemantics}. All fields are mandatory. */
Expand Down Expand Up @@ -352,6 +355,8 @@ public abstract Builder incompatibleDisallowRuleExecutionPlatformConstraintsAllo

public abstract Builder incompatibleDepsetForLibrariesToLinkGetter(boolean value);

public abstract Builder incompatibleRestrictStringEscapes(boolean value);

public abstract StarlarkSemantics build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ private static StarlarkSemanticsOptions buildRandomOptions(Random rand) throws E
"--incompatible_restrict_named_params=" + rand.nextBoolean(),
"--incompatible_static_name_resolution_in_build_files=" + rand.nextBoolean(),
"--incompatible_string_join_requires_strings=" + rand.nextBoolean(),
"--incompatible_restrict_string_escapes=" + rand.nextBoolean(),
"--internal_skylark_flag_test_canary=" + rand.nextBoolean());
}

Expand Down Expand Up @@ -218,6 +219,7 @@ private static StarlarkSemantics buildRandomSemantics(Random rand) {
.incompatibleRestrictNamedParams(rand.nextBoolean())
.incompatibleStaticNameResolutionInBuildFiles(rand.nextBoolean())
.incompatibleStringJoinRequiresStrings(rand.nextBoolean())
.incompatibleRestrictStringEscapes(rand.nextBoolean())
.internalSkylarkFlagTestCanary(rand.nextBoolean())
.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2994,4 +2994,28 @@ public void testRecursiveImport2() throws Exception {
+ "//test/skylark:ext3.bzl, //test/skylark:ext4.bzl]");
}
}

@Test
public void testUnknownStringEscapesForbidden() throws Exception {
setSkylarkSemanticsOptions("--incompatible_restrict_string_escapes=true");

scratch.file("test/extension.bzl", "y = \"\\z\"");

scratch.file("test/BUILD", "load('//test:extension.bzl', 'y')", "cc_library(name = 'r')");

reporter.removeHandler(failFastHandler);
getConfiguredTarget("//test:r");
assertContainsEvent("invalid escape sequence: \\z");
}

@Test
public void testUnknownStringEscapes() throws Exception {
setSkylarkSemanticsOptions("--incompatible_restrict_string_escapes=false");

scratch.file("test/extension.bzl", "y = \"\\z\"");

scratch.file("test/BUILD", "load('//test:extension.bzl', 'y')", "cc_library(name = 'r')");

getConfiguredTarget("//test:r");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ public void testStringEscapes() throws Exception {
.isEqualTo("STRING(ab) NEWLINE EOF"); // escape end of line
assertThat(values(tokens("\"ab\\ucd\""))).isEqualTo("STRING(abcd) NEWLINE EOF");
assertThat(lastError.toString())
.isEqualTo("/some/path.txt:1: escape sequence not implemented: \\u");
.isEqualTo("/some/path.txt:1: invalid escape sequence: \\u");
}

@Test
Expand Down
10 changes: 10 additions & 0 deletions src/test/starlark/testdata/string_misc.sky
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,13 @@ assert_eq('a1'.isalpha(), False)
assert_eq('a '.isalpha(), False)
assert_eq('A'.isalpha(), True)
assert_eq('AbZ'.isalpha(), True)

# escape sequences
assert_eq("\"", '"')
---
"\777" ### octal escape sequence out of range (maximum is \377)
---
"\" ### unterminated string literal at eol
---
""" ### unterminated string literal at eof
---
2 changes: 1 addition & 1 deletion tools/build_defs/pkg/pkg.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _remap(remap_paths, path):
return path

def _quote(filename, protect = "="):
"""Quote the filename, by escaping = by \= and \ by \\"""
"""Quote the filename, by escaping = by \\= and \\ by \\\\"""
return filename.replace("\\", "\\\\").replace(protect, "\\" + protect)

def _pkg_tar_impl(ctx):
Expand Down
4 changes: 2 additions & 2 deletions tools/cpp/windows_cc_configure.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _get_path_env_var(repository_ctx, name):
return None

def _get_temp_env(repository_ctx):
"""Returns the value of TMP, or TEMP, or if both undefined then C:\Windows."""
"""Returns the value of TMP, or TEMP, or if both undefined then C:\\Windows."""
tmp = _get_path_env_var(repository_ctx, "TMP")
if not tmp:
tmp = _get_path_env_var(repository_ctx, "TEMP")
Expand Down Expand Up @@ -86,7 +86,7 @@ def _get_escaped_windows_msys_starlark_content(repository_ctx, use_mingw = False
return tool_paths, tool_bin_path, include_directories

def _get_system_root(repository_ctx):
"""Get System root path on Windows, default is C:\\\Windows. Doesn't %-escape the result."""
"""Get System root path on Windows, default is C:\\Windows. Doesn't %-escape the result."""
systemroot = _get_path_env_var(repository_ctx, "SYSTEMROOT")
if not systemroot:
systemroot = "C:\\Windows"
Expand Down