diff --git a/crates/swc_ecma_fast_parser/src/lexer/cursor.rs b/crates/swc_ecma_fast_parser/src/lexer/cursor.rs index 9a4b1d090a7f..bad78e78a0b0 100644 --- a/crates/swc_ecma_fast_parser/src/lexer/cursor.rs +++ b/crates/swc_ecma_fast_parser/src/lexer/cursor.rs @@ -14,10 +14,10 @@ pub struct Cursor<'a> { input: &'a [u8], /// Current position in bytes - pos: usize, + pos: u32, /// Length of the input in bytes - len: usize, + len: u32, } impl<'a> Cursor<'a> { @@ -28,14 +28,14 @@ impl<'a> Cursor<'a> { Self { input: bytes, pos: 0, - len: bytes.len(), + len: bytes.len() as u32, } } /// Get the current position as BytePos #[inline(always)] pub fn pos(&self) -> BytePos { - BytePos(self.pos as u32) + BytePos(self.pos) } /// Check if the cursor is at the end of the input @@ -51,28 +51,28 @@ impl<'a> Cursor<'a> { None } else { // SAFETY: We've checked that pos < len - Some(unsafe { *self.input.get_unchecked(self.pos) }) + Some(unsafe { *self.input.get_unchecked(self.pos as usize) }) } } /// Peek at a byte at a specific offset from the current position #[inline(always)] - pub fn peek_at(&self, offset: usize) -> Option { + pub fn peek_at(&self, offset: u32) -> Option { let target_pos = self.pos + offset; if unlikely(target_pos >= self.len) { None } else { // SAFETY: We've checked that target_pos < len - Some(unsafe { *self.input.get_unchecked(target_pos) }) + Some(unsafe { *self.input.get_unchecked(target_pos as usize) }) } } /// Peek at multiple bytes without advancing #[inline(always)] - pub fn peek_n(&self, n: usize) -> &[u8] { + pub fn peek_n(&self, n: u32) -> &[u8] { let end = (self.pos + n).min(self.len); // SAFETY: We've ensured end <= len - unsafe { self.input.get_unchecked(self.pos..end) } + unsafe { self.input.get_unchecked(self.pos as usize..end as usize) } } /// Advance the cursor by one byte @@ -85,13 +85,13 @@ impl<'a> Cursor<'a> { /// Advance the cursor by n bytes #[inline(always)] - pub fn advance_n(&mut self, n: usize) { + pub fn advance_n(&mut self, n: u32) { self.pos = (self.pos + n).min(self.len); } /// Advance until the predicate returns false or EOF is reached #[inline] - pub fn advance_while(&mut self, mut predicate: F) -> usize + pub fn advance_while(&mut self, mut predicate: F) -> u32 where F: FnMut(u8) -> bool, { @@ -108,7 +108,7 @@ impl<'a> Cursor<'a> { where F: FnMut(u8) -> bool, { - const BATCH_SIZE: usize = 32; + const BATCH_SIZE: u32 = 32; // Process in batches if we have more than BATCH_SIZE bytes while self.pos + BATCH_SIZE <= self.len { @@ -117,7 +117,7 @@ impl<'a> Cursor<'a> { // Check all bytes in the batch for i in 0..BATCH_SIZE { // SAFETY: We've verified bounds above - let byte = unsafe { *self.input.get_unchecked(self.pos + i) }; + let byte = unsafe { *self.input.get_unchecked((self.pos + i) as usize) }; if !predicate(byte) { should_stop = true; break; @@ -146,33 +146,36 @@ impl<'a> Cursor<'a> { #[inline(always)] pub fn rest(&self) -> &'a [u8] { // SAFETY: pos is always <= len - unsafe { self.input.get_unchecked(self.pos..) } + unsafe { self.input.get_unchecked(self.pos as usize..) } } /// Get a slice of the input #[inline(always)] - pub fn slice(&self, start: usize, end: usize) -> &'a [u8] { + pub fn slice(&self, start: u32, end: u32) -> &'a [u8] { let real_start = start.min(self.len); let real_end = end.min(self.len); // SAFETY: We've validated bounds - unsafe { self.input.get_unchecked(real_start..real_end) } + unsafe { + self.input + .get_unchecked(real_start as usize..real_end as usize) + } } /// Get the current position #[inline(always)] - pub fn position(&self) -> usize { + pub fn position(&self) -> u32 { self.pos } /// Reset the cursor to a specific position #[inline(always)] pub fn reset_to(&mut self, pos: BytePos) { - self.pos = pos.0 as usize; + self.pos = pos.0; } /// Find the next occurrence of a byte #[inline] - pub fn find_byte(&self, byte: u8) -> Option { + pub fn find_byte(&self, byte: u8) -> Option { // If we're at or near EOF, use the standard implementation if unlikely(self.pos + 16 > self.len) { return self.find_byte_scalar(byte); @@ -184,18 +187,18 @@ impl<'a> Cursor<'a> { /// SIMD-accelerated implementation of find_byte #[inline] - fn find_byte_simd(&self, byte: u8) -> Option { - let input = &self.input[self.pos..]; - let mut position = 0; + fn find_byte_simd(&self, byte: u8) -> Option { + let input = &self.input[self.pos as usize..]; + let mut position = 0u32; // Process 16 bytes at a time - while position + 16 <= input.len() { + while position + 16 <= input.len() as u32 { // Create a vector with our pattern let needle = u8x16::splat(byte); // Create a vector with current chunk of data let mut data = [0u8; 16]; - data.copy_from_slice(&input[position..position + 16]); + data.copy_from_slice(&input[position as usize..(position + 16) as usize]); let chunk = u8x16::new(data); // Compare for equality @@ -208,7 +211,7 @@ impl<'a> Cursor<'a> { #[allow(clippy::needless_range_loop)] for i in 0..16 { if mask_array[i] != 0 { - return Some(self.pos + position + i); + return Some(self.pos + position + i as u32); } } @@ -216,11 +219,11 @@ impl<'a> Cursor<'a> { } // Handle the remainder with the scalar implementation - if position < input.len() { - return input[position..] + if position < input.len() as u32 { + return input[position as usize..] .iter() .position(|&b| b == byte) - .map(|pos| self.pos + position + pos); + .map(|pos| self.pos + position + pos as u32); } None @@ -228,10 +231,10 @@ impl<'a> Cursor<'a> { /// Standard fallback implementation #[inline] - fn find_byte_scalar(&self, byte: u8) -> Option { - self.input[self.pos..] + fn find_byte_scalar(&self, byte: u8) -> Option { + self.input[self.pos as usize..] .iter() .position(|&b| b == byte) - .map(|pos| self.pos + pos) + .map(|pos| self.pos + pos as u32) } } diff --git a/crates/swc_ecma_fast_parser/src/lexer/identifier.rs b/crates/swc_ecma_fast_parser/src/lexer/identifier.rs index 993dc5142cf5..840fd6aad00d 100644 --- a/crates/swc_ecma_fast_parser/src/lexer/identifier.rs +++ b/crates/swc_ecma_fast_parser/src/lexer/identifier.rs @@ -49,7 +49,7 @@ impl Lexer<'_> { // Extract the identifier text let span = self.span(); - let ident_start = start_pos.0 as usize; + let ident_start = start_pos.0; let ident_end = self.cursor.position(); let ident_bytes = self.cursor.slice(ident_start, ident_end); let ident_str = unsafe { std::str::from_utf8_unchecked(ident_bytes) }; diff --git a/crates/swc_ecma_fast_parser/src/lexer/jsx.rs b/crates/swc_ecma_fast_parser/src/lexer/jsx.rs index 98de0ecf1a68..5dd0f5e7c02f 100644 --- a/crates/swc_ecma_fast_parser/src/lexer/jsx.rs +++ b/crates/swc_ecma_fast_parser/src/lexer/jsx.rs @@ -107,7 +107,7 @@ impl Lexer<'_> { /// Read JSX text content fn read_jsx_text(&mut self, had_line_break: bool) -> Result { let start_pos = self.start_pos; - let start_idx = start_pos.0 as usize; + let start_idx = start_pos.0; let mut text = String::new(); diff --git a/crates/swc_ecma_fast_parser/src/lexer/mod.rs b/crates/swc_ecma_fast_parser/src/lexer/mod.rs index 4cc578c5f301..06acf5369bac 100644 --- a/crates/swc_ecma_fast_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_fast_parser/src/lexer/mod.rs @@ -502,7 +502,7 @@ impl<'a> Lexer<'a> { #[inline] fn process_whitespace_simd(&mut self) -> bool { // Need at least 16 bytes to use SIMD - if self.cursor.position() + 16 > self.cursor.rest().len() { + if self.cursor.position() + 16 > self.cursor.rest().len() as u32 { return false; } diff --git a/crates/swc_ecma_fast_parser/src/lexer/number.rs b/crates/swc_ecma_fast_parser/src/lexer/number.rs index d4bbd7debaeb..7b9a787b3e98 100644 --- a/crates/swc_ecma_fast_parser/src/lexer/number.rs +++ b/crates/swc_ecma_fast_parser/src/lexer/number.rs @@ -40,7 +40,7 @@ impl<'a> Lexer<'a> { #[inline] pub(super) fn read_number(&mut self) -> Result { let start_pos = self.start_pos; - let start_idx = start_pos.0 as usize; + let start_idx = start_pos.0; // Check for leading dot (e.g. .123) let starts_with_dot = self.cursor.peek() == Some(b'.'); @@ -233,7 +233,7 @@ impl<'a> Lexer<'a> { /// Extract the raw string representation of a number #[inline] - fn extract_number_str(&self, start_idx: usize) -> Cow<'a, str> { + fn extract_number_str(&self, start_idx: u32) -> Cow<'a, str> { let end_idx = self.cursor.position(); let num_slice = self.cursor.slice(start_idx, end_idx); // Filter out the underscore separators @@ -253,7 +253,7 @@ impl<'a> Lexer<'a> { /// Parse a binary number (0b...) #[inline] - fn parse_binary_number(&self, start_idx: usize) -> f64 { + fn parse_binary_number(&self, start_idx: u32) -> f64 { let start = start_idx + 2; // Skip '0b' let end = self.cursor.position(); @@ -271,7 +271,7 @@ impl<'a> Lexer<'a> { /// Parse an octal number (0o...) #[inline] - fn parse_octal_number(&self, start_idx: usize) -> f64 { + fn parse_octal_number(&self, start_idx: u32) -> f64 { let start = start_idx + 2; // Skip '0o' let end = self.cursor.position(); @@ -289,7 +289,7 @@ impl<'a> Lexer<'a> { /// Parse a hexadecimal number (0x...) #[inline] - fn parse_hex_number(&self, start_idx: usize) -> f64 { + fn parse_hex_number(&self, start_idx: u32) -> f64 { let start = start_idx + 2; // Skip '0x' let end = self.cursor.position(); @@ -308,7 +308,7 @@ impl<'a> Lexer<'a> { /// Parse a decimal number #[inline] - fn parse_decimal_number(&self, start_idx: usize, _starts_with_dot: bool) -> f64 { + fn parse_decimal_number(&self, start_idx: u32, _starts_with_dot: bool) -> f64 { // For decimal numbers with possible fractional and exponent parts, // use the Rust standard library's parser which is highly optimized let raw_str = self.extract_number_str(start_idx); @@ -317,7 +317,7 @@ impl<'a> Lexer<'a> { /// Create a BigInt token #[inline] - fn create_bigint_token(&self, start_idx: usize) -> Result { + fn create_bigint_token(&self, start_idx: u32) -> Result { use num_bigint::BigInt; let end_idx = self.cursor.position(); diff --git a/crates/swc_ecma_fast_parser/src/lexer/operators.rs b/crates/swc_ecma_fast_parser/src/lexer/operators.rs index 149da7ca8e3b..f0276d3a4d3f 100644 --- a/crates/swc_ecma_fast_parser/src/lexer/operators.rs +++ b/crates/swc_ecma_fast_parser/src/lexer/operators.rs @@ -304,11 +304,12 @@ impl Lexer<'_> { /// Read a less-than token (< or <= or << or <=) pub(super) fn read_less_than(&mut self) -> Result { + let start_pos = self.start_pos; self.cursor.advance(); // Skip the initial '<' // Check for JSX mode if self.in_jsx_element { - self.cursor.advance_n(usize::MAX); // Reset cursor to start position + self.cursor.reset_to(start_pos); return self.read_jsx_token(self.had_line_break.into()); } diff --git a/crates/swc_ecma_fast_parser/src/lexer/regex.rs b/crates/swc_ecma_fast_parser/src/lexer/regex.rs index fc281e7965d6..dbd8c9020465 100644 --- a/crates/swc_ecma_fast_parser/src/lexer/regex.rs +++ b/crates/swc_ecma_fast_parser/src/lexer/regex.rs @@ -15,7 +15,7 @@ impl Lexer<'_> { /// Assumes the initial '/' has been consumed pub(super) fn read_regex(&mut self, had_line_break: bool) -> Result { let start_pos = self.start_pos; - let start_idx = start_pos.0 as usize; + let start_idx = start_pos.0; // Read the pattern let mut in_class = false; // Whether we're in a character class [...] diff --git a/crates/swc_ecma_fast_parser/src/lexer/string.rs b/crates/swc_ecma_fast_parser/src/lexer/string.rs index bbf1a5d26edb..522e9aa4742e 100644 --- a/crates/swc_ecma_fast_parser/src/lexer/string.rs +++ b/crates/swc_ecma_fast_parser/src/lexer/string.rs @@ -167,7 +167,7 @@ impl Lexer<'_> { }; // Extract the raw string (including quotes) - let raw_start = start_pos.0 as usize; + let raw_start = start_pos.0; let raw_end = self.cursor.position(); let raw_bytes = self.cursor.slice(raw_start, raw_end); let raw_str = unsafe { std::str::from_utf8_unchecked(raw_bytes) }; @@ -201,7 +201,7 @@ impl Lexer<'_> { /// Find the end of a string without processing escape sequences #[inline] - fn find_string_end(&self, quote: u8) -> Option { + fn find_string_end(&self, quote: u8) -> Option { let pos = 0; let rest = self.cursor.rest(); @@ -212,18 +212,18 @@ impl Lexer<'_> { /// SIMD-accelerated implementation for finding end of string #[inline] - fn find_string_end_simd(&self, start_pos: usize, rest: &[u8], quote: u8) -> Option { + fn find_string_end_simd(&self, start_pos: u32, rest: &[u8], quote: u8) -> Option { // Safety check for small inputs - process with standard method - if rest.len() < 32 || start_pos >= rest.len() { + if rest.len() < 32 || start_pos >= rest.len() as u32 { return None; } let mut pos = start_pos; // Process in chunks of 16 bytes using SIMD - while pos + 16 <= rest.len() { + while pos + 16 <= rest.len() as u32 { // Load 16 bytes - let chunk_bytes = &rest[pos..pos + 16]; + let chunk_bytes = &rest[pos as usize..(pos + 16) as usize]; let mut bytes = [0u8; 16]; bytes.copy_from_slice(chunk_bytes); let chunk = u8x16::new(bytes); @@ -255,7 +255,7 @@ impl Lexer<'_> { { // We found a character that needs special handling // Process from here using the standard algorithm - return self.find_string_end_standard(pos + i, rest, quote); + return self.find_string_end_standard(pos + i as u32, rest, quote); } } @@ -264,7 +264,7 @@ impl Lexer<'_> { } // Process remainder with standard algorithm - if pos < rest.len() { + if pos < rest.len() as u32 { return self.find_string_end_standard(pos, rest, quote); } @@ -273,17 +273,17 @@ impl Lexer<'_> { /// Standard (non-SIMD) implementation of string end finding #[inline] - fn find_string_end_standard(&self, start_pos: usize, rest: &[u8], quote: u8) -> Option { + fn find_string_end_standard(&self, start_pos: u32, rest: &[u8], quote: u8) -> Option { let mut pos = start_pos; let mut in_escape = false; // Safety check for empty input - if rest.is_empty() || pos >= rest.len() { + if rest.is_empty() || pos >= rest.len() as u32 { return None; } - while pos < rest.len() { - let ch = unsafe { *rest.get_unchecked(pos) }; + while pos < rest.len() as u32 { + let ch = unsafe { *rest.get_unchecked(pos as usize) }; if in_escape { // Skip the escaped character @@ -297,7 +297,7 @@ impl Lexer<'_> { in_escape = true; pos += 1; // If we're at the end after a backslash, it's unterminated - if pos >= rest.len() { + if pos >= rest.len() as u32 { return None; } } else if ch == quote { diff --git a/crates/swc_ecma_fast_parser/src/lexer/template.rs b/crates/swc_ecma_fast_parser/src/lexer/template.rs index 8167df1e7e2e..27b2fe33b80f 100644 --- a/crates/swc_ecma_fast_parser/src/lexer/template.rs +++ b/crates/swc_ecma_fast_parser/src/lexer/template.rs @@ -15,7 +15,7 @@ impl Lexer<'_> { /// Read a template literal content pub(super) fn read_template_content(&mut self, had_line_break: bool) -> Result { let start_pos = self.start_pos; - let start_idx = start_pos.0 as usize; + let start_idx = start_pos.0; // If it starts with "${", return a DollarLBrace token if self.cursor.peek_at(0) == Some(b'$') && self.cursor.peek_at(1) == Some(b'{') {