Support '\r\n' separators when splitting Ninja files into separately ?

?parsed fragments. Closes #10210. PiperOrigin-RevId: 280170780
bazelbuild · Nov 13, 2019 · 7535d4c · 7535d4c
1 parent ceadf0a
commit 7535d4c
Show file tree

Hide file tree

Showing 12 changed files with 281 additions and 191 deletions.
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/BufferSplitter.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/BufferSplitter.java
@@ -30,23 +30,23 @@
 public class BufferSplitter implements Callable<List<ByteFragmentAtOffset>> {
   private final ByteBufferFragment bufferFragment;
   private final DeclarationConsumer consumer;
-  private final SeparatorPredicate separatorPredicate;
+  private final SeparatorFinder separatorFinder;
   private final int offset;
 
   /**
    * @param bufferFragment {@link ByteBufferFragment}, fragment of which should be splitted
    * @param consumer declaration consumer
-   * @param separatorPredicate predicate for separating declarations
+   * @param separatorFinder finds declaration separators
    * @param offset start offset of <code>buffer</code> from the beginning of the file
    */
   public BufferSplitter(
       ByteBufferFragment bufferFragment,
       DeclarationConsumer consumer,
-      SeparatorPredicate separatorPredicate,
+      SeparatorFinder separatorFinder,
       int offset) {
     this.bufferFragment = bufferFragment;
     this.consumer = consumer;
-    this.separatorPredicate = separatorPredicate;
+    this.separatorFinder = separatorFinder;
     this.offset = offset;
   }
 
@@ -61,22 +61,19 @@ public BufferSplitter(
   public List<ByteFragmentAtOffset> call() throws Exception {
     List<ByteFragmentAtOffset> fragments = Lists.newArrayList();
     int start = 0;
-    for (int i = 0; i < bufferFragment.length() - 2; i++) {
-      byte previous = bufferFragment.byteAt(i);
-      byte current = bufferFragment.byteAt(i + 1);
-      byte next = bufferFragment.byteAt(i + 2);
-
-      if (!separatorPredicate.test(previous, current, next)) {
-        continue;
+    while (true) {
+      int end = separatorFinder.findNextSeparator(bufferFragment, start, -1);
+      if (end < 0) {
+        break;
       }
-      ByteBufferFragment fragment = bufferFragment.subFragment(start, i + 2);
+      ByteBufferFragment fragment = bufferFragment.subFragment(start, end + 1);
       ByteFragmentAtOffset fragmentAtOffset = new ByteFragmentAtOffset(offset, fragment);
       if (start > 0) {
         consumer.declaration(fragmentAtOffset);
       } else {
         fragments.add(fragmentAtOffset);
       }
-      start = i + 2;
+      start = end + 1;
     }
     // There is always at least one byte at the bounds of the fragment.
     ByteBufferFragment lastFragment = bufferFragment.subFragment(start, bufferFragment.length());

diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/DeclarationAssembler.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/DeclarationAssembler.java
@@ -18,6 +18,8 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Range;
+import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 
@@ -28,18 +30,18 @@
  */
 public class DeclarationAssembler {
   private final DeclarationConsumer declarationConsumer;
-  private final SeparatorPredicate separatorPredicate;
+  private final SeparatorFinder separatorFinder;
 
   /**
    * @param declarationConsumer delegate declaration consumer for actual processing / parsing
-   * @param separatorPredicate predicate used to determine if two fragments should be separate
+   * @param separatorFinder callback used to determine if two fragments should be separate
    *     declarations (in the Ninja case, if the new line starts with a space, it should be treated
    *     as a part of the previous declaration, i.e. the separator is longer then one symbol).
    */
   public DeclarationAssembler(
-      DeclarationConsumer declarationConsumer, SeparatorPredicate separatorPredicate) {
+      DeclarationConsumer declarationConsumer, SeparatorFinder separatorFinder) {
     this.declarationConsumer = declarationConsumer;
-    this.separatorPredicate = separatorPredicate;
+    this.separatorFinder = separatorFinder;
   }
 
   /**
@@ -69,56 +71,60 @@ public void wrapUp(List<ByteFragmentAtOffset> fragments) throws GenericParsingEx
   }
 
   private void sendMerged(List<ByteFragmentAtOffset> list) throws GenericParsingException {
-    int offset = -1;
-    List<ByteBufferFragment> leftPart = Lists.newArrayList();
-
-    for (ByteFragmentAtOffset edge : list) {
-      ByteBufferFragment sequence = edge.getFragment();
-      // If the new sequence is separate from already collected parts,
-      // merge them and feed to consumer.
-      if (!leftPart.isEmpty()) {
-        ByteBufferFragment lastPart = Iterables.getLast(leftPart);
-        // The order of symbols: previousInOld, lastInOld, currentInNew, nextInNew.
-        byte previousInOld = lastPart.length() == 1 ? 0 : lastPart.byteAt(lastPart.length() - 2);
-        byte lastInOld = lastPart.byteAt(lastPart.length() - 1);
-        byte currentInNew = sequence.byteAt(0);
-        byte nextInNew = sequence.length() == 1 ? 0 : sequence.byteAt(1);
-
-        // <symbol> | \n<non-space>
-        if (separatorPredicate.test(lastInOld, currentInNew, nextInNew)) {
-          // Add separator to the end of the accumulated sequence
-          leftPart.add(sequence.subFragment(0, 1));
-          ByteFragmentAtOffset byteFragmentAtOffset =
-              new ByteFragmentAtOffset(edge.getOffset(), ByteBufferFragment.merge(leftPart));
-          declarationConsumer.declaration(byteFragmentAtOffset);
-          leftPart.clear();
-          // Cutting out the separator in the beginning
-          if (sequence.length() > 1) {
-            leftPart.add(sequence.subFragment(1, sequence.length()));
-            offset = edge.getOffset();
-          }
-          continue;
-        }
+    Preconditions.checkArgument(!list.isEmpty());
+    ByteFragmentAtOffset first = list.get(0);
+    if (list.size() == 1) {
+      declarationConsumer.declaration(first);
+      return;
+    }
 
-        // <symbol>\n | <non-space>
-        if (separatorPredicate.test(previousInOld, lastInOld, currentInNew)) {
-          ByteFragmentAtOffset byteFragmentAtOffset =
-              new ByteFragmentAtOffset(edge.getOffset(), ByteBufferFragment.merge(leftPart));
-          declarationConsumer.declaration(byteFragmentAtOffset);
-          leftPart.clear();
-        }
+    // 1. We merge all the passed fragments into one fragment.
+    // 2. We check 6 bytes at the connection of two fragments, 3 bytes in each part:
+    // separator can consist of 4 bytes (<escape>/r/n<indent>),
+    // so in case only a part of the separator is in one of the fragments,
+    // we get 3 bytes in one part and one byte in the other.
+    // 3. We record the ranges of at most 6 bytes at the connections of the fragments into
+    // interestingRanges.
+    // 4. Later we will check only interestingRanges for separators, and create corresponding
+    // fragments; the underlying common ByteBuffer will be reused, so we are not performing
+    // extensive copying.
+    int firstOffset = first.getOffset();
+    List<ByteBufferFragment> fragments = new ArrayList<>();
+    List<Range<Integer>> interestingRanges = Lists.newArrayList();
+    int fragmentShift = 0;
+    for (ByteFragmentAtOffset byteFragmentAtOffset : list) {
+      ByteBufferFragment fragment = byteFragmentAtOffset.getFragment();
+      fragments.add(fragment);
+      if (fragmentShift > 0) {
+        // We are only looking for the separators between fragments.
+        int start = Math.max(0, fragmentShift - 3);
+        int end = fragmentShift + Math.min(4, fragment.length());
+        // Assert that the ranges are not intersecting, otherwise the code that iterates ranges
+        // will work incorrectly.
+        Preconditions.checkState(
+            interestingRanges.isEmpty()
+                || Iterables.getLast(interestingRanges).upperEndpoint() < start);
+        interestingRanges.add(Range.openClosed(start, end));
       }
+      fragmentShift += fragment.length();
+    }
+
+    ByteBufferFragment merged = ByteBufferFragment.merge(fragments);
 
-      leftPart.add(sequence);
-      if (offset == -1) {
-        offset = edge.getOffset();
+    int previousEnd = 0;
+    for (Range<Integer> range : interestingRanges) {
+      int idx =
+          separatorFinder.findNextSeparator(merged, range.lowerEndpoint(), range.upperEndpoint());
+      if (idx >= 0) {
+        // There should always be a previous fragment, as we are checking non-intersecting ranges,
+        // starting from the connection point between first and second fragments.
+        Preconditions.checkState(idx > previousEnd);
+        declarationConsumer.declaration(
+            new ByteFragmentAtOffset(firstOffset, merged.subFragment(previousEnd, idx + 1)));
+        previousEnd = idx + 1;
       }
     }
-    if (!leftPart.isEmpty()) {
-      Preconditions.checkState(offset >= 0);
-      ByteFragmentAtOffset byteFragmentAtOffset =
-          new ByteFragmentAtOffset(offset, ByteBufferFragment.merge(leftPart));
-      declarationConsumer.declaration(byteFragmentAtOffset);
-    }
+    declarationConsumer.declaration(
+        new ByteFragmentAtOffset(firstOffset, merged.subFragment(previousEnd, merged.length())));
   }
 }
diff --git a/.../rules/ninja/file/SeparatorPredicate.java → ...nja/file/IncorrectSeparatorException.java b/.../rules/ninja/file/SeparatorPredicate.java → ...nja/file/IncorrectSeparatorException.java
@@ -15,15 +15,9 @@
 
 package com.google.devtools.build.lib.bazel.rules.ninja.file;
 
-/** Interface for determining where the byte sequence should be split into parts. */
-public interface SeparatorPredicate {
-
-  /**
-   * Returns true if the sequence should be split after <code>current</code> byte.
-   *
-   * @param previous previous byte (before current)
-   * @param current current byte
-   * @param next next byte (after current)
-   */
-  boolean test(byte previous, byte current, byte next);
+/** Thrown by {@link BufferSplitter} when incorrect file separators are used ('\r'). */
+public class IncorrectSeparatorException extends GenericParsingException {
+  public IncorrectSeparatorException(String message) {
+    super(message);
+  }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorFinder.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorFinder.java
@@ -0,0 +1,78 @@
+// Copyright 2019 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+package com.google.devtools.build.lib.bazel.rules.ninja.file;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Implementation of {@link SeparatorFinder} for Ninja files.
+ *
+ * <p>The Ninja declaration consists of several text lines; if the line is a part of the previous
+ * declaration, it starts with some amount of spaces or tabs. If the line is the beginning of the
+ * new declaration, it starts with non-space symbol. Dollar symbol '$' escapes the newline, i.e.
+ * "$\nsomething" does not contain a separator.
+ *
+ * <p>We support '\r\n' separators in Ninja files and throw {@link IncorrectSeparatorException} in
+ * case an incorrect separator '\r' is used.
+ */
+public class NinjaSeparatorFinder implements SeparatorFinder {
+  public static final NinjaSeparatorFinder INSTANCE = new NinjaSeparatorFinder();
+
+  private static final byte DOLLAR_BYTE = '$';
+  private static final byte LINEFEED_BYTE = '\r';
+  private static final byte NEWLINE_BYTE = '\n';
+  private static final byte SPACE_BYTE = ' ';
+  private static final byte TAB_BYTE = '\t';
+
+  private NinjaSeparatorFinder() {}
+
+  @Override
+  public int findNextSeparator(ByteBufferFragment fragment, int startingFrom, int untilExcluded)
+      throws IncorrectSeparatorException {
+    Preconditions.checkState(startingFrom < fragment.length());
+    Preconditions.checkState(untilExcluded < 0 || untilExcluded <= fragment.length());
+
+    boolean escaped = DOLLAR_BYTE == fragment.byteAt(startingFrom);
+    int endExcl = untilExcluded > 0 ? untilExcluded : fragment.length();
+    for (int i = startingFrom + 1; i < endExcl - 1; i++) {
+      byte current = fragment.byteAt(i);
+      byte next = fragment.byteAt(i + 1);
+      byte afterNextOrSpace = i < (endExcl - 2) ? fragment.byteAt(i + 2) : SPACE_BYTE;
+      if (LINEFEED_BYTE == current && NEWLINE_BYTE != next) {
+        throw new IncorrectSeparatorException(
+            "Wrong newline separators: \\r should be followed by \\n.");
+      }
+      if (!escaped
+          && SPACE_BYTE != afterNextOrSpace
+          && TAB_BYTE != afterNextOrSpace
+          && LINEFEED_BYTE == current) {
+        // To do not introduce the length of the separator, let us point to the last symbol of it.
+        return i + 1;
+      }
+      if (!escaped && SPACE_BYTE != next && TAB_BYTE != next && NEWLINE_BYTE == current) {
+        return i;
+      }
+      if (escaped && LINEFEED_BYTE == current) {
+        // Jump over the whole escaped linefeed + newline.
+        ++i;
+        escaped = false;
+      } else {
+        escaped = DOLLAR_BYTE == current;
+      }
+    }
+    return -1;
+  }
+}
diff --git a/...in/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorPredicate.java b/...in/java/com/google/devtools/build/lib/bazel/rules/ninja/file/NinjaSeparatorPredicate.java