diff --git a/library/alloc/src/slice.rs b/library/alloc/src/slice.rs index 0c23d7cc0b071..40287e9fa1040 100644 --- a/library/alloc/src/slice.rs +++ b/library/alloc/src/slice.rs @@ -672,32 +672,28 @@ impl [u8] { #[unstable(issue = "none", feature = "std_internals")] #[allow(dead_code)] /// Safety: - /// - Must be UTF-8 + /// - Must be valid UTF-8 pub unsafe fn make_utf8_uppercase(&mut self) -> Result> { let mut queue = VecDeque::new(); let mut read_offset = 0; let mut write_offset = 0; - let mut buffer = [0; 4]; while let Some((codepoint, width)) = unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) } { read_offset += width; + // Queue must be flushed before encode_to_slice_or_else_to_queue is + // called to ensure proper order of bytes + dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset); let lowercase_char = unsafe { char::from_u32_unchecked(codepoint) }; for c in lowercase_char.to_uppercase() { - let l = c.len_utf8(); - c.encode_utf8(&mut buffer); - queue.extend(&buffer[..l]); - } - while write_offset < read_offset { - match queue.pop_front() { - Some(b) => { - self[write_offset] = b; - write_offset += 1; - } - None => break, - } + encode_to_slice_or_else_to_queue( + c, + &mut queue, + &mut self[..read_offset], + &mut write_offset, + ); } } assert_eq!(read_offset, self.len()); @@ -708,19 +704,21 @@ impl [u8] { #[unstable(issue = "none", feature = "std_internals")] #[allow(dead_code)] /// Safety: - /// - Must be UTF-8 + /// - Must be valid UTF-8 pub unsafe fn make_utf8_lowercase(&mut self) -> Result> { let mut queue = VecDeque::new(); let mut read_offset = 0; let mut write_offset = 0; - let mut buffer = [0; 4]; let mut final_sigma_automata = FinalSigmaAutomata::new(); while let Some((codepoint, width)) = unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) } { read_offset += width; + // Queue must be flushed before encode_to_slice_or_else_to_queue is + // called to ensure proper order of bytes + dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset); let uppercase_char = unsafe { char::from_u32_unchecked(codepoint) }; if uppercase_char == 'Σ' { // Σ maps to σ, except at the end of a word where it maps to ς. @@ -729,26 +727,23 @@ impl [u8] { let is_word_final = final_sigma_automata.is_accepting() && !case_ignorable_then_cased(rest.chars()); let sigma_lowercase = if is_word_final { 'ς' } else { 'σ' }; - let l = sigma_lowercase.len_utf8(); - sigma_lowercase.encode_utf8(&mut buffer); - queue.extend(&buffer[..l]); + encode_to_slice_or_else_to_queue( + sigma_lowercase, + &mut queue, + &mut self[..read_offset], + &mut write_offset, + ); } else { for c in uppercase_char.to_lowercase() { - let l = c.len_utf8(); - c.encode_utf8(&mut buffer); - queue.extend(&buffer[..l]); + encode_to_slice_or_else_to_queue( + c, + &mut queue, + &mut self[..read_offset], + &mut write_offset, + ); } } final_sigma_automata.step(uppercase_char); - while write_offset < read_offset { - match queue.pop_front() { - Some(b) => { - self[write_offset] = b; - write_offset += 1; - } - None => break, - } - } } assert_eq!(read_offset, self.len()); return if write_offset < read_offset { Ok(write_offset) } else { Err(queue) }; @@ -764,6 +759,33 @@ impl [u8] { } } +fn encode_to_slice_or_else_to_queue( + c: char, + queue: &mut VecDeque, + slice: &mut [u8], + write_offset: &mut usize, +) { + let mut buffer = [0; 4]; + let len = c.encode_utf8(&mut buffer).len(); + let writable_slice = &mut slice[*write_offset..]; + let direct_copy_length = core::cmp::min(len, writable_slice.len()); + writable_slice[..direct_copy_length].copy_from_slice(&buffer[..direct_copy_length]); + *write_offset += direct_copy_length; + queue.extend(&buffer[direct_copy_length..len]); +} + +fn dump_queue(queue: &mut VecDeque, slice: &mut [u8], write_offset: &mut usize) { + while *write_offset < slice.len() { + match queue.pop_front() { + Some(b) => { + slice[*write_offset] = b; + *write_offset += 1; + } + None => break, + } + } +} + #[derive(Clone)] enum FinalSigmaAutomata { Init,