Skip to content

Commit

Permalink
Bypass queue when possible in slice::make_*case
Browse files Browse the repository at this point in the history
  • Loading branch information
krtab committed Feb 18, 2025
1 parent fffbb33 commit eb5113b
Showing 1 changed file with 49 additions and 29 deletions.
78 changes: 49 additions & 29 deletions library/alloc/src/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -679,27 +679,22 @@ impl [u8] {
let mut read_offset = 0;
let mut write_offset = 0;

let mut buffer = [0; 4];
while let Some((codepoint, width)) =
unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) }
{
read_offset += width;
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
let lowercase_char = unsafe { char::from_u32_unchecked(codepoint) };
for c in lowercase_char.to_uppercase() {
let l = c.len_utf8();
c.encode_utf8(&mut buffer);
queue.extend(&buffer[..l]);
}
while write_offset < read_offset {
match queue.pop_front() {
Some(b) => {
self[write_offset] = b;
write_offset += 1;
}
None => break,
}
encode_to_slice_or_else_to_queue(
c,
&mut queue,
&mut self[..read_offset],
&mut write_offset,
);
}
}
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
assert_eq!(read_offset, self.len());
if write_offset < read_offset { Ok(write_offset) } else { Err(queue) }
}
Expand All @@ -715,12 +710,12 @@ impl [u8] {
let mut read_offset = 0;
let mut write_offset = 0;

let mut buffer = [0; 4];
let mut final_sigma_automata = FinalSigmaAutomata::new();
while let Some((codepoint, width)) =
unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) }
{
read_offset += width;
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
let uppercase_char = unsafe { char::from_u32_unchecked(codepoint) };
if uppercase_char == 'Σ' {
// Σ maps to σ, except at the end of a word where it maps to ς.
Expand All @@ -729,27 +724,25 @@ impl [u8] {
let is_word_final =
final_sigma_automata.is_accepting() && !case_ignorable_then_cased(rest.chars());
let sigma_lowercase = if is_word_final { 'ς' } else { 'σ' };
let l = sigma_lowercase.len_utf8();
sigma_lowercase.encode_utf8(&mut buffer);
queue.extend(&buffer[..l]);
encode_to_slice_or_else_to_queue(
sigma_lowercase,
&mut queue,
&mut self[..read_offset],
&mut write_offset,
);
} else {
for c in uppercase_char.to_lowercase() {
let l = c.len_utf8();
c.encode_utf8(&mut buffer);
queue.extend(&buffer[..l]);
encode_to_slice_or_else_to_queue(
c,
&mut queue,
&mut self[..read_offset],
&mut write_offset,
);
}
}
final_sigma_automata.step(uppercase_char);
while write_offset < read_offset {
match queue.pop_front() {
Some(b) => {
self[write_offset] = b;
write_offset += 1;
}
None => break,
}
}
}
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
assert_eq!(read_offset, self.len());
return if write_offset < read_offset { Ok(write_offset) } else { Err(queue) };

Expand All @@ -764,6 +757,33 @@ impl [u8] {
}
}

fn encode_to_slice_or_else_to_queue(
c: char,
queue: &mut VecDeque<u8>,
slice: &mut [u8],
write_offset: &mut usize,
) {
let mut buffer = [0; 4];
let len = c.encode_utf8(&mut buffer).len();
let writable_slice = &mut slice[*write_offset..];
let direct_copy_length = core::cmp::min(len, writable_slice.len());
writable_slice[..direct_copy_length].copy_from_slice(&buffer[..direct_copy_length]);
*write_offset += direct_copy_length;
queue.extend(&buffer[direct_copy_length..len]);
}

fn dump_queue(queue: &mut VecDeque<u8>, slice: &mut [u8], write_offset: &mut usize) {
while *write_offset < slice.len() {
match queue.pop_front() {
Some(b) => {
slice[*write_offset] = b;
*write_offset += 1;
}
None => break,
}
}
}

#[derive(Clone)]
enum FinalSigmaAutomata {
Init,
Expand Down

0 comments on commit eb5113b

Please sign in to comment.