Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly capture trailing 'unglued' token #79978

Merged
merged 1 commit into from
Dec 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 58 additions & 9 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub use path::PathStyle;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, Spacing};
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree};
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing};
use rustc_ast::DUMMY_NODE_ID;
use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe};
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit};
Expand Down Expand Up @@ -132,6 +132,28 @@ struct TokenCursor {
// Counts the number of calls to `next` or `next_desugared`,
// depending on whether `desugar_doc_comments` is set.
num_next_calls: usize,
// During parsing, we may sometimes need to 'unglue' a
// glued token into two component tokens
// (e.g. '>>' into '>' and '>), so that the parser
// can consume them one at a time. This process
// bypasses the normal capturing mechanism
// (e.g. `num_next_calls` will not be incremented),
// since the 'unglued' tokens due not exist in
// the original `TokenStream`.
//
// If we end up consuming both unglued tokens,
// then this is not an issue - we'll end up
// capturing the single 'glued' token.
//
// However, in certain circumstances, we may
// want to capture just the first 'unglued' token.
// For example, capturing the `Vec<u8>`
// in `Option<Vec<u8>>` requires us to unglue
// the trailing `>>` token. The `append_unglued_token`
// field is used to track this token - it gets
// appended to the captured stream when
// we evaluate a `LazyTokenStream`
append_unglued_token: Option<TreeAndSpacing>,
}

#[derive(Clone)]
Expand Down Expand Up @@ -336,6 +358,7 @@ impl<'a> Parser<'a> {
stack: Vec::new(),
num_next_calls: 0,
desugar_doc_comments,
append_unglued_token: None,
},
desugar_doc_comments,
unmatched_angle_bracket_count: 0,
Expand All @@ -359,6 +382,10 @@ impl<'a> Parser<'a> {
self.token_cursor.next()
};
self.token_cursor.num_next_calls += 1;
// We've retrieved an token from the underlying
// cursor, so we no longer need to worry about
// an unglued token. See `break_and_eat` for more details
self.token_cursor.append_unglued_token = None;
if next.span.is_dummy() {
// Tweak the location for better diagnostics, but keep syntactic context intact.
next.span = fallback_span.with_ctxt(next.span.ctxt());
Expand Down Expand Up @@ -555,6 +582,14 @@ impl<'a> Parser<'a> {
let first_span = self.sess.source_map().start_point(self.token.span);
let second_span = self.token.span.with_lo(first_span.hi());
self.token = Token::new(first, first_span);
// Keep track of this token - if we end token capturing now,
// we'll want to append this token to the captured stream.
//
// If we consume any additional tokens, then this token
// is not needed (we'll capture the entire 'glued' token),
// and `next_tok` will set this field to `None`
self.token_cursor.append_unglued_token =
Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
// Use the spacing of the glued token as the spacing
// of the unglued second token.
self.bump_with((Token::new(second, second_span), self.token_spacing));
Expand Down Expand Up @@ -1230,6 +1265,7 @@ impl<'a> Parser<'a> {
num_calls: usize,
desugar_doc_comments: bool,
trailing_semi: bool,
append_unglued_token: Option<TreeAndSpacing>,
}
impl CreateTokenStream for LazyTokenStreamImpl {
fn create_token_stream(&self) -> TokenStream {
Expand All @@ -1253,12 +1289,18 @@ impl<'a> Parser<'a> {
}))
.take(num_calls);

make_token_stream(tokens)
make_token_stream(tokens, self.append_unglued_token.clone())
}
fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> {
if self.trailing_semi {
panic!("Called `add_trailing_semi` twice!");
}
if self.append_unglued_token.is_some() {
panic!(
"Cannot call `add_trailing_semi` when we have an unglued token {:?}",
self.append_unglued_token
);
}
let mut new = self.clone();
new.trailing_semi = true;
Box::new(new)
Expand All @@ -1271,6 +1313,7 @@ impl<'a> Parser<'a> {
cursor_snapshot,
desugar_doc_comments: self.desugar_doc_comments,
trailing_semi: false,
append_unglued_token: self.token_cursor.append_unglued_token.clone(),
};
Ok((ret, Some(LazyTokenStream::new(lazy_impl))))
}
Expand Down Expand Up @@ -1325,7 +1368,10 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
/// of open and close delims.
fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStream {
fn make_token_stream(
tokens: impl Iterator<Item = (Token, Spacing)>,
append_unglued_token: Option<TreeAndSpacing>,
) -> TokenStream {
#[derive(Debug)]
struct FrameData {
open: Span,
Expand All @@ -1348,14 +1394,17 @@ fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStr
.inner
.push((delimited, Spacing::Alone));
}
token => stack
.last_mut()
.expect("Bottom token frame is missing!")
.inner
.push((TokenTree::Token(token), spacing)),
token => {
stack
.last_mut()
.expect("Bottom token frame is missing!")
.inner
.push((TokenTree::Token(token), spacing));
}
}
}
let final_buf = stack.pop().expect("Missing final buf!");
let mut final_buf = stack.pop().expect("Missing final buf!");
final_buf.inner.extend(append_unglued_token);
assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
TokenStream::new(final_buf.inner)
}
20 changes: 20 additions & 0 deletions src/test/ui/proc-macro/capture-unglued-token.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// aux-build:test-macros.rs
// compile-flags: -Z span-debug
// check-pass

// Tests that we properly handle parsing a nonterminal
// where we have two consecutive angle brackets (one inside
// the nonterminal, and one outside)

#![no_std] // Don't load unnecessary hygiene information from std
extern crate std;
extern crate test_macros;

macro_rules! trailing_angle {
(Option<$field:ty>) => {
test_macros::print_bang_consume!($field);
}
}

trailing_angle!(Option<Vec<u8>>);
fn main() {}
28 changes: 28 additions & 0 deletions src/test/ui/proc-macro/capture-unglued-token.stdout
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
PRINT-BANG INPUT (DISPLAY): Vec<u8>
PRINT-BANG RE-COLLECTED (DISPLAY): Vec < u8 >
PRINT-BANG INPUT (DEBUG): TokenStream [
Group {
delimiter: None,
stream: TokenStream [
Ident {
ident: "Vec",
span: $DIR/capture-unglued-token.rs:19:24: 19:27 (#0),
},
Punct {
ch: '<',
spacing: Alone,
span: $DIR/capture-unglued-token.rs:19:27: 19:28 (#0),
},
Ident {
ident: "u8",
span: $DIR/capture-unglued-token.rs:19:28: 19:30 (#0),
},
Punct {
ch: '>',
spacing: Alone,
span: $DIR/capture-unglued-token.rs:19:30: 19:31 (#0),
},
],
span: $DIR/capture-unglued-token.rs:15:42: 15:48 (#4),
},
]