Skip to content

Commit

Permalink
u32
Browse files Browse the repository at this point in the history
  • Loading branch information
kdy1 committed Mar 6, 2025
1 parent 136a0ae commit af3567b
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 57 deletions.
65 changes: 34 additions & 31 deletions crates/swc_ecma_fast_parser/src/lexer/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ pub struct Cursor<'a> {
input: &'a [u8],

/// Current position in bytes
pos: usize,
pos: u32,

/// Length of the input in bytes
len: usize,
len: u32,
}

impl<'a> Cursor<'a> {
Expand All @@ -28,14 +28,14 @@ impl<'a> Cursor<'a> {
Self {
input: bytes,
pos: 0,
len: bytes.len(),
len: bytes.len() as u32,
}
}

/// Get the current position as BytePos
#[inline(always)]
pub fn pos(&self) -> BytePos {
BytePos(self.pos as u32)
BytePos(self.pos)
}

/// Check if the cursor is at the end of the input
Expand All @@ -51,28 +51,28 @@ impl<'a> Cursor<'a> {
None
} else {
// SAFETY: We've checked that pos < len
Some(unsafe { *self.input.get_unchecked(self.pos) })
Some(unsafe { *self.input.get_unchecked(self.pos as usize) })
}
}

/// Peek at a byte at a specific offset from the current position
#[inline(always)]
pub fn peek_at(&self, offset: usize) -> Option<u8> {
pub fn peek_at(&self, offset: u32) -> Option<u8> {
let target_pos = self.pos + offset;
if unlikely(target_pos >= self.len) {
None
} else {
// SAFETY: We've checked that target_pos < len
Some(unsafe { *self.input.get_unchecked(target_pos) })
Some(unsafe { *self.input.get_unchecked(target_pos as usize) })
}
}

/// Peek at multiple bytes without advancing
#[inline(always)]
pub fn peek_n(&self, n: usize) -> &[u8] {
pub fn peek_n(&self, n: u32) -> &[u8] {
let end = (self.pos + n).min(self.len);
// SAFETY: We've ensured end <= len
unsafe { self.input.get_unchecked(self.pos..end) }
unsafe { self.input.get_unchecked(self.pos as usize..end as usize) }
}

/// Advance the cursor by one byte
Expand All @@ -85,13 +85,13 @@ impl<'a> Cursor<'a> {

/// Advance the cursor by n bytes
#[inline(always)]
pub fn advance_n(&mut self, n: usize) {
pub fn advance_n(&mut self, n: u32) {
self.pos = (self.pos + n).min(self.len);
}

/// Advance until the predicate returns false or EOF is reached
#[inline]
pub fn advance_while<F>(&mut self, mut predicate: F) -> usize
pub fn advance_while<F>(&mut self, mut predicate: F) -> u32
where
F: FnMut(u8) -> bool,
{
Expand All @@ -108,7 +108,7 @@ impl<'a> Cursor<'a> {
where
F: FnMut(u8) -> bool,
{
const BATCH_SIZE: usize = 32;
const BATCH_SIZE: u32 = 32;

// Process in batches if we have more than BATCH_SIZE bytes
while self.pos + BATCH_SIZE <= self.len {
Expand All @@ -117,7 +117,7 @@ impl<'a> Cursor<'a> {
// Check all bytes in the batch
for i in 0..BATCH_SIZE {
// SAFETY: We've verified bounds above
let byte = unsafe { *self.input.get_unchecked(self.pos + i) };
let byte = unsafe { *self.input.get_unchecked((self.pos + i) as usize) };
if !predicate(byte) {
should_stop = true;
break;
Expand Down Expand Up @@ -146,33 +146,36 @@ impl<'a> Cursor<'a> {
#[inline(always)]
pub fn rest(&self) -> &'a [u8] {
// SAFETY: pos is always <= len
unsafe { self.input.get_unchecked(self.pos..) }
unsafe { self.input.get_unchecked(self.pos as usize..) }
}

/// Get a slice of the input
#[inline(always)]
pub fn slice(&self, start: usize, end: usize) -> &'a [u8] {
pub fn slice(&self, start: u32, end: u32) -> &'a [u8] {
let real_start = start.min(self.len);
let real_end = end.min(self.len);
// SAFETY: We've validated bounds
unsafe { self.input.get_unchecked(real_start..real_end) }
unsafe {
self.input
.get_unchecked(real_start as usize..real_end as usize)
}
}

/// Get the current position
#[inline(always)]
pub fn position(&self) -> usize {
pub fn position(&self) -> u32 {
self.pos
}

/// Reset the cursor to a specific position
#[inline(always)]
pub fn reset_to(&mut self, pos: BytePos) {
self.pos = pos.0 as usize;
self.pos = pos.0;
}

/// Find the next occurrence of a byte
#[inline]
pub fn find_byte(&self, byte: u8) -> Option<usize> {
pub fn find_byte(&self, byte: u8) -> Option<u32> {
// If we're at or near EOF, use the standard implementation
if unlikely(self.pos + 16 > self.len) {
return self.find_byte_scalar(byte);
Expand All @@ -184,18 +187,18 @@ impl<'a> Cursor<'a> {

/// SIMD-accelerated implementation of find_byte
#[inline]
fn find_byte_simd(&self, byte: u8) -> Option<usize> {
let input = &self.input[self.pos..];
let mut position = 0;
fn find_byte_simd(&self, byte: u8) -> Option<u32> {
let input = &self.input[self.pos as usize..];
let mut position = 0u32;

// Process 16 bytes at a time
while position + 16 <= input.len() {
while position + 16 <= input.len() as u32 {
// Create a vector with our pattern
let needle = u8x16::splat(byte);

// Create a vector with current chunk of data
let mut data = [0u8; 16];
data.copy_from_slice(&input[position..position + 16]);
data.copy_from_slice(&input[position as usize..(position + 16) as usize]);
let chunk = u8x16::new(data);

// Compare for equality
Expand All @@ -208,30 +211,30 @@ impl<'a> Cursor<'a> {
#[allow(clippy::needless_range_loop)]
for i in 0..16 {
if mask_array[i] != 0 {
return Some(self.pos + position + i);
return Some(self.pos + position + i as u32);
}
}

position += 16;
}

// Handle the remainder with the scalar implementation
if position < input.len() {
return input[position..]
if position < input.len() as u32 {
return input[position as usize..]
.iter()
.position(|&b| b == byte)
.map(|pos| self.pos + position + pos);
.map(|pos| self.pos + position + pos as u32);
}

None
}

/// Standard fallback implementation
#[inline]
fn find_byte_scalar(&self, byte: u8) -> Option<usize> {
self.input[self.pos..]
fn find_byte_scalar(&self, byte: u8) -> Option<u32> {
self.input[self.pos as usize..]
.iter()
.position(|&b| b == byte)
.map(|pos| self.pos + pos)
.map(|pos| self.pos + pos as u32)
}
}
2 changes: 1 addition & 1 deletion crates/swc_ecma_fast_parser/src/lexer/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ impl Lexer<'_> {

// Extract the identifier text
let span = self.span();
let ident_start = start_pos.0 as usize;
let ident_start = start_pos.0;
let ident_end = self.cursor.position();
let ident_bytes = self.cursor.slice(ident_start, ident_end);
let ident_str = unsafe { std::str::from_utf8_unchecked(ident_bytes) };
Expand Down
2 changes: 1 addition & 1 deletion crates/swc_ecma_fast_parser/src/lexer/jsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ impl Lexer<'_> {
/// Read JSX text content
fn read_jsx_text(&mut self, had_line_break: bool) -> Result<Token> {
let start_pos = self.start_pos;
let start_idx = start_pos.0 as usize;
let start_idx = start_pos.0;

let mut text = String::new();

Expand Down
2 changes: 1 addition & 1 deletion crates/swc_ecma_fast_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ impl<'a> Lexer<'a> {
#[inline]
fn process_whitespace_simd(&mut self) -> bool {
// Need at least 16 bytes to use SIMD
if self.cursor.position() + 16 > self.cursor.rest().len() {
if self.cursor.position() + 16 > self.cursor.rest().len() as u32 {
return false;
}

Expand Down
14 changes: 7 additions & 7 deletions crates/swc_ecma_fast_parser/src/lexer/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ impl<'a> Lexer<'a> {
#[inline]
pub(super) fn read_number(&mut self) -> Result<Token> {
let start_pos = self.start_pos;
let start_idx = start_pos.0 as usize;
let start_idx = start_pos.0;

// Check for leading dot (e.g. .123)
let starts_with_dot = self.cursor.peek() == Some(b'.');
Expand Down Expand Up @@ -233,7 +233,7 @@ impl<'a> Lexer<'a> {

/// Extract the raw string representation of a number
#[inline]
fn extract_number_str(&self, start_idx: usize) -> Cow<'a, str> {
fn extract_number_str(&self, start_idx: u32) -> Cow<'a, str> {
let end_idx = self.cursor.position();
let num_slice = self.cursor.slice(start_idx, end_idx);
// Filter out the underscore separators
Expand All @@ -253,7 +253,7 @@ impl<'a> Lexer<'a> {

/// Parse a binary number (0b...)
#[inline]
fn parse_binary_number(&self, start_idx: usize) -> f64 {
fn parse_binary_number(&self, start_idx: u32) -> f64 {
let start = start_idx + 2; // Skip '0b'
let end = self.cursor.position();

Expand All @@ -271,7 +271,7 @@ impl<'a> Lexer<'a> {

/// Parse an octal number (0o...)
#[inline]
fn parse_octal_number(&self, start_idx: usize) -> f64 {
fn parse_octal_number(&self, start_idx: u32) -> f64 {
let start = start_idx + 2; // Skip '0o'
let end = self.cursor.position();

Expand All @@ -289,7 +289,7 @@ impl<'a> Lexer<'a> {

/// Parse a hexadecimal number (0x...)
#[inline]
fn parse_hex_number(&self, start_idx: usize) -> f64 {
fn parse_hex_number(&self, start_idx: u32) -> f64 {
let start = start_idx + 2; // Skip '0x'
let end = self.cursor.position();

Expand All @@ -308,7 +308,7 @@ impl<'a> Lexer<'a> {

/// Parse a decimal number
#[inline]
fn parse_decimal_number(&self, start_idx: usize, _starts_with_dot: bool) -> f64 {
fn parse_decimal_number(&self, start_idx: u32, _starts_with_dot: bool) -> f64 {
// For decimal numbers with possible fractional and exponent parts,
// use the Rust standard library's parser which is highly optimized
let raw_str = self.extract_number_str(start_idx);
Expand All @@ -317,7 +317,7 @@ impl<'a> Lexer<'a> {

/// Create a BigInt token
#[inline]
fn create_bigint_token(&self, start_idx: usize) -> Result<Token> {
fn create_bigint_token(&self, start_idx: u32) -> Result<Token> {
use num_bigint::BigInt;

let end_idx = self.cursor.position();
Expand Down
3 changes: 2 additions & 1 deletion crates/swc_ecma_fast_parser/src/lexer/operators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,12 @@ impl Lexer<'_> {

/// Read a less-than token (< or <= or << or <=)
pub(super) fn read_less_than(&mut self) -> Result<Token> {
let start_pos = self.start_pos;
self.cursor.advance(); // Skip the initial '<'

// Check for JSX mode
if self.in_jsx_element {
self.cursor.advance_n(usize::MAX); // Reset cursor to start position
self.cursor.reset_to(start_pos);
return self.read_jsx_token(self.had_line_break.into());
}

Expand Down
2 changes: 1 addition & 1 deletion crates/swc_ecma_fast_parser/src/lexer/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ impl Lexer<'_> {
/// Assumes the initial '/' has been consumed
pub(super) fn read_regex(&mut self, had_line_break: bool) -> Result<Token> {
let start_pos = self.start_pos;
let start_idx = start_pos.0 as usize;
let start_idx = start_pos.0;

// Read the pattern
let mut in_class = false; // Whether we're in a character class [...]
Expand Down
Loading

0 comments on commit af3567b

Please sign in to comment.