Skip to content

Commit

Permalink
handle arbitrary length _* in e _* suffix
Browse files Browse the repository at this point in the history
  • Loading branch information
richard-uk1 committed Oct 13, 2024
1 parent 73e94f8 commit 8b41315
Showing 1 changed file with 67 additions and 28 deletions.
95 changes: 67 additions & 28 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,17 +361,6 @@ pub fn is_ident(string: &str) -> bool {
}
}

/// Is the character after the 'e' in a number valid for an exponent?
///
/// If not the number will be passed to the parser with a suffix beginning with 'e' rather
/// than an exponent (and will be rejected there).
///
/// The way this function is written means that `1e_` is considered an invalid exponent
/// rather than a number with suffix.
fn is_exponent_second(ch: char) -> bool {
matches!(ch, '0'..='9' | '_' | '+' | '-')
}

impl Cursor<'_> {
/// Parses a token from the input string.
pub fn advance_token(&mut self) -> Token {
Expand Down Expand Up @@ -425,9 +414,7 @@ impl Cursor<'_> {

// Numeric literal.
c @ '0'..='9' => {
let literal_kind = self.number(c);
let suffix_start = self.pos_within_token();
self.eat_literal_suffix();
let (literal_kind, suffix_start) = self.number(c);
TokenKind::Literal { kind: literal_kind, suffix_start }
}

Expand Down Expand Up @@ -624,7 +611,7 @@ impl Cursor<'_> {
}
}

fn number(&mut self, first_digit: char) -> LiteralKind {
fn number(&mut self, first_digit: char) -> (LiteralKind, u32) {
debug_assert!('0' <= self.prev() && self.prev() <= '9');
let mut base = Base::Decimal;
if first_digit == '0' {
Expand All @@ -634,21 +621,27 @@ impl Cursor<'_> {
base = Base::Binary;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
let suffix_start = self.pos_within_token();
self.eat_literal_suffix();
return (Int { base, empty_int: true }, suffix_start);
}
}
'o' => {
base = Base::Octal;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
let suffix_start = self.pos_within_token();
self.eat_literal_suffix();
return (Int { base, empty_int: true }, suffix_start);
}
}
'x' => {
base = Base::Hexadecimal;
self.bump();
if !self.eat_hexadecimal_digits() {
return Int { base, empty_int: true };
let suffix_start = self.pos_within_token();
self.eat_literal_suffix();
return (Int { base, empty_int: true }, suffix_start);
}
}
// Not a base prefix; consume additional digits.
Expand All @@ -660,40 +653,85 @@ impl Cursor<'_> {
'.' | 'e' | 'E' => {}

// Just a 0.
_ => return Int { base, empty_int: false },
_ => {
let suffix_start = self.pos_within_token();
self.eat_literal_suffix();
return (Int { base, empty_int: false }, suffix_start);
}
}
} else {
// No base prefix, parse number in the usual way.
self.eat_decimal_digits();
};

match self.first() {
match (self.first(), self.second()) {
// Don't be greedy if this is actually an
// integer literal followed by field/method access or a range pattern
// (`0..2` and `12.foo()`)
'.' if self.second() != '.' && !is_id_start(self.second()) => {
('.', second) if second != '.' && !is_id_start(second) => {
// might have stuff after the ., and if it does, it needs to start
// with a number
self.bump();
let mut empty_exponent = false;
let mut suffix_start = self.pos_within_token();
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
match self.first() {
'e' | 'E' if is_exponent_second(self.second()) => {
// This will be the start of the suffix if there is no exponent
suffix_start = self.pos_within_token();
match (self.first(), self.second()) {
('e' | 'E', '_') => {
// check if series of `_` is ended by a digit. If yes
// include it in the number as exponent. If no include
// it in suffix.
while matches!(self.first(), '_') {
self.bump();
}
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
suffix_start = self.pos_within_token();
}
}
('e' | 'E', '0'..'9' | '+' | '-') => {
// definitely an exponent
self.bump();
empty_exponent = !self.eat_float_exponent();
suffix_start = self.pos_within_token();
}
_ => (),
}
}
Float { base, empty_exponent }
self.eat_literal_suffix();
(Float { base, empty_exponent }, suffix_start)
}
('e' | 'E', '_') => {
// see above bock for similar apporach
let non_exponent_suffix_start = self.pos_within_token();
while matches!(self.first(), '_') {
self.bump();
}
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
let suffix_start = self.pos_within_token();
self.eat_literal_suffix();
(Float { base, empty_exponent: false }, suffix_start)
} else {
// No digit means suffix, and therefore int
(Int { base, empty_int: false }, non_exponent_suffix_start)
}
}
'e' | 'E' if is_exponent_second(self.second()) => {
('e' | 'E', '0'..='9' | '+' | '-') => {
// definitely an exponent
self.bump();
let empty_exponent = !self.eat_float_exponent();
Float { base, empty_exponent }
let suffix_start = self.pos_within_token();
self.eat_literal_suffix();
(Float { base, empty_exponent }, suffix_start)
}
_ => {
let suffix_start = self.pos_within_token();
self.eat_literal_suffix();
(Int { base, empty_int: false }, suffix_start)
}
_ => Int { base, empty_int: false },
}
}

Expand Down Expand Up @@ -942,6 +980,7 @@ impl Cursor<'_> {
}
}

/// Returns `true` if a digit was consumed (rather than just '_')
fn eat_decimal_digits(&mut self) -> bool {
let mut has_digits = false;
loop {
Expand Down Expand Up @@ -979,7 +1018,7 @@ impl Cursor<'_> {
/// Eats the float exponent. Returns true if at least one digit was met,
/// and returns false otherwise.
fn eat_float_exponent(&mut self) -> bool {
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
debug_assert!(matches!(self.prev(), 'e' | 'E'));
if self.first() == '-' || self.first() == '+' {
self.bump();
}
Expand Down

0 comments on commit 8b41315

Please sign in to comment.