From 42066b029f3009518e2ba8c7bd20f5781b180b8f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 13 Mar 2024 15:34:05 +1100 Subject: [PATCH 1/4] Inline and remove `Parser::parse_expr_tuple_field_access_float`. It has a single call site, and afterwards all the calls to `parse_expr_tuple_field_access` are in a single method, which is nice. --- compiler/rustc_parse/src/parser/expr.rs | 74 +++++++++++++------------ 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index fe17eba0d7b8b..cf7771f87a8c9 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1005,7 +1005,44 @@ impl<'a> Parser<'a> { Ok(self.parse_expr_tuple_field_access(lo, base, symbol, suffix, None)) } token::Literal(token::Lit { kind: token::Float, symbol, suffix }) => { - Ok(self.parse_expr_tuple_field_access_float(lo, base, symbol, suffix)) + Ok(match self.break_up_float(symbol, self.token.span) { + // 1e2 + DestructuredFloat::Single(sym, _sp) => { + self.parse_expr_tuple_field_access(lo, base, sym, suffix, None) + } + // 1. + DestructuredFloat::TrailingDot(sym, ident_span, dot_span) => { + assert!(suffix.is_none()); + self.token = Token::new(token::Ident(sym, IdentIsRaw::No), ident_span); + let next_token = (Token::new(token::Dot, dot_span), self.token_spacing); + self.parse_expr_tuple_field_access(lo, base, sym, None, Some(next_token)) + } + // 1.2 | 1.2e3 + DestructuredFloat::MiddleDot( + symbol1, + ident1_span, + dot_span, + symbol2, + ident2_span, + ) => { + self.token = Token::new(token::Ident(symbol1, IdentIsRaw::No), ident1_span); + // This needs to be `Spacing::Alone` to prevent regressions. + // See issue #76399 and PR #76285 for more details + let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone); + let base1 = self.parse_expr_tuple_field_access( + lo, + base, + symbol1, + None, + Some(next_token1), + ); + let next_token2 = + Token::new(token::Ident(symbol2, IdentIsRaw::No), ident2_span); + self.bump_with((next_token2, self.token_spacing)); // `.` + self.parse_expr_tuple_field_access(lo, base1, symbol2, suffix, None) + } + DestructuredFloat::Error => base, + }) } _ => { self.error_unexpected_after_dot(); @@ -1119,41 +1156,6 @@ impl<'a> Parser<'a> { } } - fn parse_expr_tuple_field_access_float( - &mut self, - lo: Span, - base: P, - float: Symbol, - suffix: Option, - ) -> P { - match self.break_up_float(float, self.token.span) { - // 1e2 - DestructuredFloat::Single(sym, _sp) => { - self.parse_expr_tuple_field_access(lo, base, sym, suffix, None) - } - // 1. - DestructuredFloat::TrailingDot(sym, ident_span, dot_span) => { - assert!(suffix.is_none()); - self.token = Token::new(token::Ident(sym, IdentIsRaw::No), ident_span); - let next_token = (Token::new(token::Dot, dot_span), self.token_spacing); - self.parse_expr_tuple_field_access(lo, base, sym, None, Some(next_token)) - } - // 1.2 | 1.2e3 - DestructuredFloat::MiddleDot(symbol1, ident1_span, dot_span, symbol2, ident2_span) => { - self.token = Token::new(token::Ident(symbol1, IdentIsRaw::No), ident1_span); - // This needs to be `Spacing::Alone` to prevent regressions. - // See issue #76399 and PR #76285 for more details - let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone); - let base1 = - self.parse_expr_tuple_field_access(lo, base, symbol1, None, Some(next_token1)); - let next_token2 = Token::new(token::Ident(symbol2, IdentIsRaw::No), ident2_span); - self.bump_with((next_token2, self.token_spacing)); // `.` - self.parse_expr_tuple_field_access(lo, base1, symbol2, suffix, None) - } - DestructuredFloat::Error => base, - } - } - /// Parse the field access used in offset_of, matched by `$(e:expr)+`. /// Currently returns a list of idents. However, it should be possible in /// future to also do array indices, which might be arbitrary expressions. From 90eeb3d6817303469059041ef7c8c57f3508f476 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 13 Mar 2024 15:45:27 +1100 Subject: [PATCH 2/4] Remove `next_token` handling from `parse_expr_tuple_field_access`. It's clearer at the call site. --- compiler/rustc_parse/src/parser/expr.rs | 28 +++++++++---------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index cf7771f87a8c9..50589ad3f1260 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1002,20 +1002,22 @@ impl<'a> Parser<'a> { match self.token.uninterpolate().kind { token::Ident(..) => self.parse_dot_suffix(base, lo), token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => { - Ok(self.parse_expr_tuple_field_access(lo, base, symbol, suffix, None)) + self.bump(); + Ok(self.parse_expr_tuple_field_access(lo, base, symbol, suffix)) } token::Literal(token::Lit { kind: token::Float, symbol, suffix }) => { Ok(match self.break_up_float(symbol, self.token.span) { // 1e2 DestructuredFloat::Single(sym, _sp) => { - self.parse_expr_tuple_field_access(lo, base, sym, suffix, None) + self.bump(); + self.parse_expr_tuple_field_access(lo, base, sym, suffix) } // 1. DestructuredFloat::TrailingDot(sym, ident_span, dot_span) => { assert!(suffix.is_none()); self.token = Token::new(token::Ident(sym, IdentIsRaw::No), ident_span); - let next_token = (Token::new(token::Dot, dot_span), self.token_spacing); - self.parse_expr_tuple_field_access(lo, base, sym, None, Some(next_token)) + self.bump_with((Token::new(token::Dot, dot_span), self.token_spacing)); + self.parse_expr_tuple_field_access(lo, base, sym, None) } // 1.2 | 1.2e3 DestructuredFloat::MiddleDot( @@ -1028,18 +1030,13 @@ impl<'a> Parser<'a> { self.token = Token::new(token::Ident(symbol1, IdentIsRaw::No), ident1_span); // This needs to be `Spacing::Alone` to prevent regressions. // See issue #76399 and PR #76285 for more details - let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone); - let base1 = self.parse_expr_tuple_field_access( - lo, - base, - symbol1, - None, - Some(next_token1), - ); + self.bump_with((Token::new(token::Dot, dot_span), Spacing::Alone)); + let base1 = self.parse_expr_tuple_field_access(lo, base, symbol1, None); let next_token2 = Token::new(token::Ident(symbol2, IdentIsRaw::No), ident2_span); self.bump_with((next_token2, self.token_spacing)); // `.` - self.parse_expr_tuple_field_access(lo, base1, symbol2, suffix, None) + self.bump(); + self.parse_expr_tuple_field_access(lo, base1, symbol2, suffix) } DestructuredFloat::Error => base, }) @@ -1263,12 +1260,7 @@ impl<'a> Parser<'a> { base: P, field: Symbol, suffix: Option, - next_token: Option<(Token, Spacing)>, ) -> P { - match next_token { - Some(next_token) => self.bump_with(next_token), - None => self.bump(), - } let span = self.prev_token.span; let field = ExprKind::Field(base, Ident::new(field, span)); if let Some(suffix) = suffix { From 9c091160dc57aa5224a35e6755c357f7089ff2e5 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 13 Mar 2024 15:59:41 +1100 Subject: [PATCH 3/4] Change `parse_expr_tuple_field_access`. Pass in the span for the field rather than using `prev_token`. Also rename it `mk_expr_tuple_field_access`, because it doesn't do any actual parsing, it just creates an expression with what it's given. Not much of a clarity win by itself, but unlocks additional subsequent simplifications. --- compiler/rustc_parse/src/parser/expr.rs | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 50589ad3f1260..f96e44eb0f761 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1002,22 +1002,25 @@ impl<'a> Parser<'a> { match self.token.uninterpolate().kind { token::Ident(..) => self.parse_dot_suffix(base, lo), token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => { + let ident_span = self.token.span; self.bump(); - Ok(self.parse_expr_tuple_field_access(lo, base, symbol, suffix)) + Ok(self.mk_expr_tuple_field_access(lo, ident_span, base, symbol, suffix)) } token::Literal(token::Lit { kind: token::Float, symbol, suffix }) => { Ok(match self.break_up_float(symbol, self.token.span) { // 1e2 DestructuredFloat::Single(sym, _sp) => { + let ident_span = self.token.span; self.bump(); - self.parse_expr_tuple_field_access(lo, base, sym, suffix) + self.mk_expr_tuple_field_access(lo, ident_span, base, sym, suffix) } // 1. DestructuredFloat::TrailingDot(sym, ident_span, dot_span) => { assert!(suffix.is_none()); self.token = Token::new(token::Ident(sym, IdentIsRaw::No), ident_span); + let ident_span = self.token.span; self.bump_with((Token::new(token::Dot, dot_span), self.token_spacing)); - self.parse_expr_tuple_field_access(lo, base, sym, None) + self.mk_expr_tuple_field_access(lo, ident_span, base, sym, None) } // 1.2 | 1.2e3 DestructuredFloat::MiddleDot( @@ -1030,13 +1033,16 @@ impl<'a> Parser<'a> { self.token = Token::new(token::Ident(symbol1, IdentIsRaw::No), ident1_span); // This needs to be `Spacing::Alone` to prevent regressions. // See issue #76399 and PR #76285 for more details + let ident_span = self.token.span; self.bump_with((Token::new(token::Dot, dot_span), Spacing::Alone)); - let base1 = self.parse_expr_tuple_field_access(lo, base, symbol1, None); + let base1 = + self.mk_expr_tuple_field_access(lo, ident_span, base, symbol1, None); let next_token2 = Token::new(token::Ident(symbol2, IdentIsRaw::No), ident2_span); self.bump_with((next_token2, self.token_spacing)); // `.` + let ident_span = self.token.span; self.bump(); - self.parse_expr_tuple_field_access(lo, base1, symbol2, suffix) + self.mk_expr_tuple_field_access(lo, ident_span, base1, symbol2, suffix) } DestructuredFloat::Error => base, }) @@ -1254,19 +1260,18 @@ impl<'a> Parser<'a> { Ok(fields.into_iter().collect()) } - fn parse_expr_tuple_field_access( + fn mk_expr_tuple_field_access( &mut self, lo: Span, + ident_span: Span, base: P, field: Symbol, suffix: Option, ) -> P { - let span = self.prev_token.span; - let field = ExprKind::Field(base, Ident::new(field, span)); if let Some(suffix) = suffix { - self.expect_no_tuple_index_suffix(span, suffix); + self.expect_no_tuple_index_suffix(ident_span, suffix); } - self.mk_expr(lo.to(span), field) + self.mk_expr(lo.to(ident_span), ExprKind::Field(base, Ident::new(field, ident_span))) } /// Parse a function call expression, `expr(...)`. From dce0f7f5c2a62a5aabb1d02351473d6f2b125541 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 21 Mar 2024 16:14:56 +1100 Subject: [PATCH 4/4] Clarify `parse_dot_suffix_expr`. For the `MiddleDot` case, current behaviour: - For a case like `1.2`, `sym1` is `1` and `sym2` is `2`, and `self.token` holds `1.2`. - It creates a new ident token from `sym1` that it puts into `self.token`. - Then it does `bump_with` with a new dot token, which moves the `sym1` token into `prev_token`. - Then it does `bump_with` with a new ident token from `sym2`, which moves the `dot` token into `prev_token` and discards the `sym1` token. - Then it does `bump`, which puts whatever is next into `self.token`, moves the `sym2` token into `prev_token`, and discards the `dot` token altogether. New behaviour: - Skips creating and inserting the `sym1` and dot tokens, because they are unnecessary. - This also demonstrates that the comment about `Spacing::Alone` is wrong -- that value is never used. That comment was added in #77250, and AFAICT it has always been incorrect. The commit also expands comments. I found this code hard to read previously, the examples in comments make it easier. --- compiler/rustc_parse/src/parser/expr.rs | 35 ++++++++++++++----------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index f96e44eb0f761..7e317c3df1465 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -16,7 +16,6 @@ use core::mem; use core::ops::ControlFlow; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Token, TokenKind}; -use rustc_ast::tokenstream::Spacing; use rustc_ast::util::case::Case; use rustc_ast::util::classify; use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity}; @@ -999,6 +998,8 @@ impl<'a> Parser<'a> { } pub fn parse_dot_suffix_expr(&mut self, lo: Span, base: P) -> PResult<'a, P> { + // At this point we've consumed something like `expr.` and `self.token` holds the token + // after the dot. match self.token.uninterpolate().kind { token::Ident(..) => self.parse_dot_suffix(base, lo), token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => { @@ -1010,39 +1011,41 @@ impl<'a> Parser<'a> { Ok(match self.break_up_float(symbol, self.token.span) { // 1e2 DestructuredFloat::Single(sym, _sp) => { + // `foo.1e2`: a single complete dot access, fully consumed. We end up with + // the `1e2` token in `self.prev_token` and the following token in + // `self.token`. let ident_span = self.token.span; self.bump(); self.mk_expr_tuple_field_access(lo, ident_span, base, sym, suffix) } // 1. DestructuredFloat::TrailingDot(sym, ident_span, dot_span) => { + // `foo.1.`: a single complete dot access and the start of another. + // We end up with the `sym` (`1`) token in `self.prev_token` and a dot in + // `self.token`. assert!(suffix.is_none()); self.token = Token::new(token::Ident(sym, IdentIsRaw::No), ident_span); - let ident_span = self.token.span; self.bump_with((Token::new(token::Dot, dot_span), self.token_spacing)); self.mk_expr_tuple_field_access(lo, ident_span, base, sym, None) } // 1.2 | 1.2e3 DestructuredFloat::MiddleDot( - symbol1, + sym1, ident1_span, - dot_span, - symbol2, + _dot_span, + sym2, ident2_span, ) => { - self.token = Token::new(token::Ident(symbol1, IdentIsRaw::No), ident1_span); - // This needs to be `Spacing::Alone` to prevent regressions. - // See issue #76399 and PR #76285 for more details - let ident_span = self.token.span; - self.bump_with((Token::new(token::Dot, dot_span), Spacing::Alone)); - let base1 = - self.mk_expr_tuple_field_access(lo, ident_span, base, symbol1, None); + // `foo.1.2` (or `foo.1.2e3`): two complete dot accesses. We end up with + // the `sym2` (`2` or `2e3`) token in `self.prev_token` and the following + // token in `self.token`. let next_token2 = - Token::new(token::Ident(symbol2, IdentIsRaw::No), ident2_span); - self.bump_with((next_token2, self.token_spacing)); // `.` - let ident_span = self.token.span; + Token::new(token::Ident(sym2, IdentIsRaw::No), ident2_span); + self.bump_with((next_token2, self.token_spacing)); self.bump(); - self.mk_expr_tuple_field_access(lo, ident_span, base1, symbol2, suffix) + let base1 = + self.mk_expr_tuple_field_access(lo, ident1_span, base, sym1, None); + self.mk_expr_tuple_field_access(lo, ident2_span, base1, sym2, suffix) } DestructuredFloat::Error => base, })