u32

swc-project · Mar 6, 2025 · af3567b · af3567b
1 parent 136a0ae
commit af3567b
Show file tree

Hide file tree

Showing 9 changed files with 61 additions and 57 deletions.
diff --git a/crates/swc_ecma_fast_parser/src/lexer/cursor.rs b/crates/swc_ecma_fast_parser/src/lexer/cursor.rs
@@ -14,10 +14,10 @@ pub struct Cursor<'a> {
     input: &'a [u8],
 
     /// Current position in bytes
-    pos: usize,
+    pos: u32,
 
     /// Length of the input in bytes
-    len: usize,
+    len: u32,
 }
 
 impl<'a> Cursor<'a> {
@@ -28,14 +28,14 @@ impl<'a> Cursor<'a> {
         Self {
             input: bytes,
             pos: 0,
-            len: bytes.len(),
+            len: bytes.len() as u32,
         }
     }
 
     /// Get the current position as BytePos
     #[inline(always)]
     pub fn pos(&self) -> BytePos {
-        BytePos(self.pos as u32)
+        BytePos(self.pos)
     }
 
     /// Check if the cursor is at the end of the input
@@ -51,28 +51,28 @@ impl<'a> Cursor<'a> {
             None
         } else {
             // SAFETY: We've checked that pos < len
-            Some(unsafe { *self.input.get_unchecked(self.pos) })
+            Some(unsafe { *self.input.get_unchecked(self.pos as usize) })
         }
     }
 
     /// Peek at a byte at a specific offset from the current position
     #[inline(always)]
-    pub fn peek_at(&self, offset: usize) -> Option<u8> {
+    pub fn peek_at(&self, offset: u32) -> Option<u8> {
         let target_pos = self.pos + offset;
         if unlikely(target_pos >= self.len) {
             None
         } else {
             // SAFETY: We've checked that target_pos < len
-            Some(unsafe { *self.input.get_unchecked(target_pos) })
+            Some(unsafe { *self.input.get_unchecked(target_pos as usize) })
         }
     }
 
     /// Peek at multiple bytes without advancing
     #[inline(always)]
-    pub fn peek_n(&self, n: usize) -> &[u8] {
+    pub fn peek_n(&self, n: u32) -> &[u8] {
         let end = (self.pos + n).min(self.len);
         // SAFETY: We've ensured end <= len
-        unsafe { self.input.get_unchecked(self.pos..end) }
+        unsafe { self.input.get_unchecked(self.pos as usize..end as usize) }
     }
 
     /// Advance the cursor by one byte
@@ -85,13 +85,13 @@ impl<'a> Cursor<'a> {
 
     /// Advance the cursor by n bytes
     #[inline(always)]
-    pub fn advance_n(&mut self, n: usize) {
+    pub fn advance_n(&mut self, n: u32) {
         self.pos = (self.pos + n).min(self.len);
     }
 
     /// Advance until the predicate returns false or EOF is reached
     #[inline]
-    pub fn advance_while<F>(&mut self, mut predicate: F) -> usize
+    pub fn advance_while<F>(&mut self, mut predicate: F) -> u32
     where
         F: FnMut(u8) -> bool,
     {
@@ -108,7 +108,7 @@ impl<'a> Cursor<'a> {
     where
         F: FnMut(u8) -> bool,
     {
-        const BATCH_SIZE: usize = 32;
+        const BATCH_SIZE: u32 = 32;
 
         // Process in batches if we have more than BATCH_SIZE bytes
         while self.pos + BATCH_SIZE <= self.len {
@@ -117,7 +117,7 @@ impl<'a> Cursor<'a> {
             // Check all bytes in the batch
             for i in 0..BATCH_SIZE {
                 // SAFETY: We've verified bounds above
-                let byte = unsafe { *self.input.get_unchecked(self.pos + i) };
+                let byte = unsafe { *self.input.get_unchecked((self.pos + i) as usize) };
                 if !predicate(byte) {
                     should_stop = true;
                     break;
@@ -146,33 +146,36 @@ impl<'a> Cursor<'a> {
     #[inline(always)]
     pub fn rest(&self) -> &'a [u8] {
         // SAFETY: pos is always <= len
-        unsafe { self.input.get_unchecked(self.pos..) }
+        unsafe { self.input.get_unchecked(self.pos as usize..) }
     }
 
     /// Get a slice of the input
     #[inline(always)]
-    pub fn slice(&self, start: usize, end: usize) -> &'a [u8] {
+    pub fn slice(&self, start: u32, end: u32) -> &'a [u8] {
         let real_start = start.min(self.len);
         let real_end = end.min(self.len);
         // SAFETY: We've validated bounds
-        unsafe { self.input.get_unchecked(real_start..real_end) }
+        unsafe {
+            self.input
+                .get_unchecked(real_start as usize..real_end as usize)
+        }
     }
 
     /// Get the current position
     #[inline(always)]
-    pub fn position(&self) -> usize {
+    pub fn position(&self) -> u32 {
         self.pos
     }
 
     /// Reset the cursor to a specific position
     #[inline(always)]
     pub fn reset_to(&mut self, pos: BytePos) {
-        self.pos = pos.0 as usize;
+        self.pos = pos.0;
     }
 
     /// Find the next occurrence of a byte
     #[inline]
-    pub fn find_byte(&self, byte: u8) -> Option<usize> {
+    pub fn find_byte(&self, byte: u8) -> Option<u32> {
         // If we're at or near EOF, use the standard implementation
         if unlikely(self.pos + 16 > self.len) {
             return self.find_byte_scalar(byte);
@@ -184,18 +187,18 @@ impl<'a> Cursor<'a> {
 
     /// SIMD-accelerated implementation of find_byte
     #[inline]
-    fn find_byte_simd(&self, byte: u8) -> Option<usize> {
-        let input = &self.input[self.pos..];
-        let mut position = 0;
+    fn find_byte_simd(&self, byte: u8) -> Option<u32> {
+        let input = &self.input[self.pos as usize..];
+        let mut position = 0u32;
 
         // Process 16 bytes at a time
-        while position + 16 <= input.len() {
+        while position + 16 <= input.len() as u32 {
             // Create a vector with our pattern
             let needle = u8x16::splat(byte);
 
             // Create a vector with current chunk of data
             let mut data = [0u8; 16];
-            data.copy_from_slice(&input[position..position + 16]);
+            data.copy_from_slice(&input[position as usize..(position + 16) as usize]);
             let chunk = u8x16::new(data);
 
             // Compare for equality
@@ -208,30 +211,30 @@ impl<'a> Cursor<'a> {
             #[allow(clippy::needless_range_loop)]
             for i in 0..16 {
                 if mask_array[i] != 0 {
-                    return Some(self.pos + position + i);
+                    return Some(self.pos + position + i as u32);
                 }
             }
 
             position += 16;
         }
 
         // Handle the remainder with the scalar implementation
-        if position < input.len() {
-            return input[position..]
+        if position < input.len() as u32 {
+            return input[position as usize..]
                 .iter()
                 .position(|&b| b == byte)
-                .map(|pos| self.pos + position + pos);
+                .map(|pos| self.pos + position + pos as u32);
         }
 
         None
     }
 
     /// Standard fallback implementation
     #[inline]
-    fn find_byte_scalar(&self, byte: u8) -> Option<usize> {
-        self.input[self.pos..]
+    fn find_byte_scalar(&self, byte: u8) -> Option<u32> {
+        self.input[self.pos as usize..]
             .iter()
             .position(|&b| b == byte)
-            .map(|pos| self.pos + pos)
+            .map(|pos| self.pos + pos as u32)
     }
 }
diff --git a/crates/swc_ecma_fast_parser/src/lexer/identifier.rs b/crates/swc_ecma_fast_parser/src/lexer/identifier.rs
@@ -49,7 +49,7 @@ impl Lexer<'_> {
 
         // Extract the identifier text
         let span = self.span();
-        let ident_start = start_pos.0 as usize;
+        let ident_start = start_pos.0;
         let ident_end = self.cursor.position();
         let ident_bytes = self.cursor.slice(ident_start, ident_end);
         let ident_str = unsafe { std::str::from_utf8_unchecked(ident_bytes) };

diff --git a/crates/swc_ecma_fast_parser/src/lexer/jsx.rs b/crates/swc_ecma_fast_parser/src/lexer/jsx.rs
@@ -107,7 +107,7 @@ impl Lexer<'_> {
     /// Read JSX text content
     fn read_jsx_text(&mut self, had_line_break: bool) -> Result<Token> {
         let start_pos = self.start_pos;
-        let start_idx = start_pos.0 as usize;
+        let start_idx = start_pos.0;
 
         let mut text = String::new();
 

diff --git a/crates/swc_ecma_fast_parser/src/lexer/mod.rs b/crates/swc_ecma_fast_parser/src/lexer/mod.rs
@@ -502,7 +502,7 @@ impl<'a> Lexer<'a> {
     #[inline]
     fn process_whitespace_simd(&mut self) -> bool {
         // Need at least 16 bytes to use SIMD
-        if self.cursor.position() + 16 > self.cursor.rest().len() {
+        if self.cursor.position() + 16 > self.cursor.rest().len() as u32 {
             return false;
         }
 

diff --git a/crates/swc_ecma_fast_parser/src/lexer/number.rs b/crates/swc_ecma_fast_parser/src/lexer/number.rs
@@ -40,7 +40,7 @@ impl<'a> Lexer<'a> {
     #[inline]
     pub(super) fn read_number(&mut self) -> Result<Token> {
         let start_pos = self.start_pos;
-        let start_idx = start_pos.0 as usize;
+        let start_idx = start_pos.0;
 
         // Check for leading dot (e.g. .123)
         let starts_with_dot = self.cursor.peek() == Some(b'.');
@@ -233,7 +233,7 @@ impl<'a> Lexer<'a> {
 
     /// Extract the raw string representation of a number
     #[inline]
-    fn extract_number_str(&self, start_idx: usize) -> Cow<'a, str> {
+    fn extract_number_str(&self, start_idx: u32) -> Cow<'a, str> {
         let end_idx = self.cursor.position();
         let num_slice = self.cursor.slice(start_idx, end_idx);
         // Filter out the underscore separators
@@ -253,7 +253,7 @@ impl<'a> Lexer<'a> {
 
     /// Parse a binary number (0b...)
     #[inline]
-    fn parse_binary_number(&self, start_idx: usize) -> f64 {
+    fn parse_binary_number(&self, start_idx: u32) -> f64 {
         let start = start_idx + 2; // Skip '0b'
         let end = self.cursor.position();
 
@@ -271,7 +271,7 @@ impl<'a> Lexer<'a> {
 
     /// Parse an octal number (0o...)
     #[inline]
-    fn parse_octal_number(&self, start_idx: usize) -> f64 {
+    fn parse_octal_number(&self, start_idx: u32) -> f64 {
         let start = start_idx + 2; // Skip '0o'
         let end = self.cursor.position();
 
@@ -289,7 +289,7 @@ impl<'a> Lexer<'a> {
 
     /// Parse a hexadecimal number (0x...)
     #[inline]
-    fn parse_hex_number(&self, start_idx: usize) -> f64 {
+    fn parse_hex_number(&self, start_idx: u32) -> f64 {
         let start = start_idx + 2; // Skip '0x'
         let end = self.cursor.position();
 
@@ -308,7 +308,7 @@ impl<'a> Lexer<'a> {
 
     /// Parse a decimal number
     #[inline]
-    fn parse_decimal_number(&self, start_idx: usize, _starts_with_dot: bool) -> f64 {
+    fn parse_decimal_number(&self, start_idx: u32, _starts_with_dot: bool) -> f64 {
         // For decimal numbers with possible fractional and exponent parts,
         // use the Rust standard library's parser which is highly optimized
         let raw_str = self.extract_number_str(start_idx);
@@ -317,7 +317,7 @@ impl<'a> Lexer<'a> {
 
     /// Create a BigInt token
     #[inline]
-    fn create_bigint_token(&self, start_idx: usize) -> Result<Token> {
+    fn create_bigint_token(&self, start_idx: u32) -> Result<Token> {
         use num_bigint::BigInt;
 
         let end_idx = self.cursor.position();

diff --git a/crates/swc_ecma_fast_parser/src/lexer/operators.rs b/crates/swc_ecma_fast_parser/src/lexer/operators.rs
@@ -304,11 +304,12 @@ impl Lexer<'_> {
 
     /// Read a less-than token (< or <= or << or <=)
     pub(super) fn read_less_than(&mut self) -> Result<Token> {
+        let start_pos = self.start_pos;
         self.cursor.advance(); // Skip the initial '<'
 
         // Check for JSX mode
         if self.in_jsx_element {
-            self.cursor.advance_n(usize::MAX); // Reset cursor to start position
+            self.cursor.reset_to(start_pos);
             return self.read_jsx_token(self.had_line_break.into());
         }
 

diff --git a/crates/swc_ecma_fast_parser/src/lexer/regex.rs b/crates/swc_ecma_fast_parser/src/lexer/regex.rs
@@ -15,7 +15,7 @@ impl Lexer<'_> {
     /// Assumes the initial '/' has been consumed
     pub(super) fn read_regex(&mut self, had_line_break: bool) -> Result<Token> {
         let start_pos = self.start_pos;
-        let start_idx = start_pos.0 as usize;
+        let start_idx = start_pos.0;
 
         // Read the pattern
         let mut in_class = false; // Whether we're in a character class [...]