Skip to content

Commit

Permalink
Support '\r\n' separators when splitting Ninja files into separately ?
Browse files Browse the repository at this point in the history
?parsed fragments.

Closes #10210.

PiperOrigin-RevId: 280170780
  • Loading branch information
irengrig authored and copybara-github committed Nov 13, 2019
1 parent ceadf0a commit 7535d4c
Show file tree
Hide file tree
Showing 12 changed files with 281 additions and 191 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,23 @@
public class BufferSplitter implements Callable<List<ByteFragmentAtOffset>> {
private final ByteBufferFragment bufferFragment;
private final DeclarationConsumer consumer;
private final SeparatorPredicate separatorPredicate;
private final SeparatorFinder separatorFinder;
private final int offset;

/**
* @param bufferFragment {@link ByteBufferFragment}, fragment of which should be splitted
* @param consumer declaration consumer
* @param separatorPredicate predicate for separating declarations
* @param separatorFinder finds declaration separators
* @param offset start offset of <code>buffer</code> from the beginning of the file
*/
public BufferSplitter(
ByteBufferFragment bufferFragment,
DeclarationConsumer consumer,
SeparatorPredicate separatorPredicate,
SeparatorFinder separatorFinder,
int offset) {
this.bufferFragment = bufferFragment;
this.consumer = consumer;
this.separatorPredicate = separatorPredicate;
this.separatorFinder = separatorFinder;
this.offset = offset;
}

Expand All @@ -61,22 +61,19 @@ public BufferSplitter(
public List<ByteFragmentAtOffset> call() throws Exception {
List<ByteFragmentAtOffset> fragments = Lists.newArrayList();
int start = 0;
for (int i = 0; i < bufferFragment.length() - 2; i++) {
byte previous = bufferFragment.byteAt(i);
byte current = bufferFragment.byteAt(i + 1);
byte next = bufferFragment.byteAt(i + 2);

if (!separatorPredicate.test(previous, current, next)) {
continue;
while (true) {
int end = separatorFinder.findNextSeparator(bufferFragment, start, -1);
if (end < 0) {
break;
}
ByteBufferFragment fragment = bufferFragment.subFragment(start, i + 2);
ByteBufferFragment fragment = bufferFragment.subFragment(start, end + 1);
ByteFragmentAtOffset fragmentAtOffset = new ByteFragmentAtOffset(offset, fragment);
if (start > 0) {
consumer.declaration(fragmentAtOffset);
} else {
fragments.add(fragmentAtOffset);
}
start = i + 2;
start = end + 1;
}
// There is always at least one byte at the bounds of the fragment.
ByteBufferFragment lastFragment = bufferFragment.subFragment(start, bufferFragment.length());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Range;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

Expand All @@ -28,18 +30,18 @@
*/
public class DeclarationAssembler {
private final DeclarationConsumer declarationConsumer;
private final SeparatorPredicate separatorPredicate;
private final SeparatorFinder separatorFinder;

/**
* @param declarationConsumer delegate declaration consumer for actual processing / parsing
* @param separatorPredicate predicate used to determine if two fragments should be separate
* @param separatorFinder callback used to determine if two fragments should be separate
* declarations (in the Ninja case, if the new line starts with a space, it should be treated
* as a part of the previous declaration, i.e. the separator is longer then one symbol).
*/
public DeclarationAssembler(
DeclarationConsumer declarationConsumer, SeparatorPredicate separatorPredicate) {
DeclarationConsumer declarationConsumer, SeparatorFinder separatorFinder) {
this.declarationConsumer = declarationConsumer;
this.separatorPredicate = separatorPredicate;
this.separatorFinder = separatorFinder;
}

/**
Expand Down Expand Up @@ -69,56 +71,60 @@ public void wrapUp(List<ByteFragmentAtOffset> fragments) throws GenericParsingEx
}

private void sendMerged(List<ByteFragmentAtOffset> list) throws GenericParsingException {
int offset = -1;
List<ByteBufferFragment> leftPart = Lists.newArrayList();

for (ByteFragmentAtOffset edge : list) {
ByteBufferFragment sequence = edge.getFragment();
// If the new sequence is separate from already collected parts,
// merge them and feed to consumer.
if (!leftPart.isEmpty()) {
ByteBufferFragment lastPart = Iterables.getLast(leftPart);
// The order of symbols: previousInOld, lastInOld, currentInNew, nextInNew.
byte previousInOld = lastPart.length() == 1 ? 0 : lastPart.byteAt(lastPart.length() - 2);
byte lastInOld = lastPart.byteAt(lastPart.length() - 1);
byte currentInNew = sequence.byteAt(0);
byte nextInNew = sequence.length() == 1 ? 0 : sequence.byteAt(1);

// <symbol> | \n<non-space>
if (separatorPredicate.test(lastInOld, currentInNew, nextInNew)) {
// Add separator to the end of the accumulated sequence
leftPart.add(sequence.subFragment(0, 1));
ByteFragmentAtOffset byteFragmentAtOffset =
new ByteFragmentAtOffset(edge.getOffset(), ByteBufferFragment.merge(leftPart));
declarationConsumer.declaration(byteFragmentAtOffset);
leftPart.clear();
// Cutting out the separator in the beginning
if (sequence.length() > 1) {
leftPart.add(sequence.subFragment(1, sequence.length()));
offset = edge.getOffset();
}
continue;
}
Preconditions.checkArgument(!list.isEmpty());
ByteFragmentAtOffset first = list.get(0);
if (list.size() == 1) {
declarationConsumer.declaration(first);
return;
}

// <symbol>\n | <non-space>
if (separatorPredicate.test(previousInOld, lastInOld, currentInNew)) {
ByteFragmentAtOffset byteFragmentAtOffset =
new ByteFragmentAtOffset(edge.getOffset(), ByteBufferFragment.merge(leftPart));
declarationConsumer.declaration(byteFragmentAtOffset);
leftPart.clear();
}
// 1. We merge all the passed fragments into one fragment.
// 2. We check 6 bytes at the connection of two fragments, 3 bytes in each part:
// separator can consist of 4 bytes (<escape>/r/n<indent>),
// so in case only a part of the separator is in one of the fragments,
// we get 3 bytes in one part and one byte in the other.
// 3. We record the ranges of at most 6 bytes at the connections of the fragments into
// interestingRanges.
// 4. Later we will check only interestingRanges for separators, and create corresponding
// fragments; the underlying common ByteBuffer will be reused, so we are not performing
// extensive copying.
int firstOffset = first.getOffset();
List<ByteBufferFragment> fragments = new ArrayList<>();
List<Range<Integer>> interestingRanges = Lists.newArrayList();
int fragmentShift = 0;
for (ByteFragmentAtOffset byteFragmentAtOffset : list) {
ByteBufferFragment fragment = byteFragmentAtOffset.getFragment();
fragments.add(fragment);
if (fragmentShift > 0) {
// We are only looking for the separators between fragments.
int start = Math.max(0, fragmentShift - 3);
int end = fragmentShift + Math.min(4, fragment.length());
// Assert that the ranges are not intersecting, otherwise the code that iterates ranges
// will work incorrectly.
Preconditions.checkState(
interestingRanges.isEmpty()
|| Iterables.getLast(interestingRanges).upperEndpoint() < start);
interestingRanges.add(Range.openClosed(start, end));
}
fragmentShift += fragment.length();
}

ByteBufferFragment merged = ByteBufferFragment.merge(fragments);

leftPart.add(sequence);
if (offset == -1) {
offset = edge.getOffset();
int previousEnd = 0;
for (Range<Integer> range : interestingRanges) {
int idx =
separatorFinder.findNextSeparator(merged, range.lowerEndpoint(), range.upperEndpoint());
if (idx >= 0) {
// There should always be a previous fragment, as we are checking non-intersecting ranges,
// starting from the connection point between first and second fragments.
Preconditions.checkState(idx > previousEnd);
declarationConsumer.declaration(
new ByteFragmentAtOffset(firstOffset, merged.subFragment(previousEnd, idx + 1)));
previousEnd = idx + 1;
}
}
if (!leftPart.isEmpty()) {
Preconditions.checkState(offset >= 0);
ByteFragmentAtOffset byteFragmentAtOffset =
new ByteFragmentAtOffset(offset, ByteBufferFragment.merge(leftPart));
declarationConsumer.declaration(byteFragmentAtOffset);
}
declarationConsumer.declaration(
new ByteFragmentAtOffset(firstOffset, merged.subFragment(previousEnd, merged.length())));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,9 @@

package com.google.devtools.build.lib.bazel.rules.ninja.file;

/** Interface for determining where the byte sequence should be split into parts. */
public interface SeparatorPredicate {

/**
* Returns true if the sequence should be split after <code>current</code> byte.
*
* @param previous previous byte (before current)
* @param current current byte
* @param next next byte (after current)
*/
boolean test(byte previous, byte current, byte next);
/** Thrown by {@link BufferSplitter} when incorrect file separators are used ('\r'). */
public class IncorrectSeparatorException extends GenericParsingException {
public IncorrectSeparatorException(String message) {
super(message);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright 2019 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

package com.google.devtools.build.lib.bazel.rules.ninja.file;

import com.google.common.base.Preconditions;

/**
* Implementation of {@link SeparatorFinder} for Ninja files.
*
* <p>The Ninja declaration consists of several text lines; if the line is a part of the previous
* declaration, it starts with some amount of spaces or tabs. If the line is the beginning of the
* new declaration, it starts with non-space symbol. Dollar symbol '$' escapes the newline, i.e.
* "$\nsomething" does not contain a separator.
*
* <p>We support '\r\n' separators in Ninja files and throw {@link IncorrectSeparatorException} in
* case an incorrect separator '\r' is used.
*/
public class NinjaSeparatorFinder implements SeparatorFinder {
public static final NinjaSeparatorFinder INSTANCE = new NinjaSeparatorFinder();

private static final byte DOLLAR_BYTE = '$';
private static final byte LINEFEED_BYTE = '\r';
private static final byte NEWLINE_BYTE = '\n';
private static final byte SPACE_BYTE = ' ';
private static final byte TAB_BYTE = '\t';

private NinjaSeparatorFinder() {}

@Override
public int findNextSeparator(ByteBufferFragment fragment, int startingFrom, int untilExcluded)
throws IncorrectSeparatorException {
Preconditions.checkState(startingFrom < fragment.length());
Preconditions.checkState(untilExcluded < 0 || untilExcluded <= fragment.length());

boolean escaped = DOLLAR_BYTE == fragment.byteAt(startingFrom);
int endExcl = untilExcluded > 0 ? untilExcluded : fragment.length();
for (int i = startingFrom + 1; i < endExcl - 1; i++) {
byte current = fragment.byteAt(i);
byte next = fragment.byteAt(i + 1);
byte afterNextOrSpace = i < (endExcl - 2) ? fragment.byteAt(i + 2) : SPACE_BYTE;
if (LINEFEED_BYTE == current && NEWLINE_BYTE != next) {
throw new IncorrectSeparatorException(
"Wrong newline separators: \\r should be followed by \\n.");
}
if (!escaped
&& SPACE_BYTE != afterNextOrSpace
&& TAB_BYTE != afterNextOrSpace
&& LINEFEED_BYTE == current) {
// To do not introduce the length of the separator, let us point to the last symbol of it.
return i + 1;
}
if (!escaped && SPACE_BYTE != next && TAB_BYTE != next && NEWLINE_BYTE == current) {
return i;
}
if (escaped && LINEFEED_BYTE == current) {
// Jump over the whole escaped linefeed + newline.
++i;
escaped = false;
} else {
escaped = DOLLAR_BYTE == current;
}
}
return -1;
}
}

This file was deleted.

Loading

0 comments on commit 7535d4c

Please sign in to comment.