Skip to content

Commit

Permalink
Add needed no_global_oom_handling cfg to make_case methods
Browse files Browse the repository at this point in the history
  • Loading branch information
krtab committed Feb 18, 2025
1 parent a00b4ef commit 6f1f32e
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 162 deletions.
167 changes: 5 additions & 162 deletions library/alloc/src/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,13 @@ use crate::alloc::Global;
#[cfg(not(no_global_oom_handling))]
use crate::borrow::ToOwned;
use crate::boxed::Box;
use crate::collections::VecDeque;
use crate::vec::Vec;

// Using a module here allows having the no_global_oom_handling
// in only one place
#[cfg(not(no_global_oom_handling))]
mod byte_slice_make_case;

// HACK(japaric): With cfg(test) `impl [T]` is not available, these three
// functions are actually methods that are in `impl [T]` but not in
// `core::slice::SliceExt` - we need to supply these functions for the
Expand Down Expand Up @@ -666,167 +670,6 @@ impl<T> [T] {
}
}

#[cfg(not(test))]
impl [u8] {
#[rustc_allow_incoherent_impl]
#[unstable(issue = "none", feature = "std_internals")]
#[allow(dead_code)]
/// Safety:
/// - Must be valid UTF-8
pub unsafe fn make_utf8_uppercase(&mut self) -> Result<usize, VecDeque<u8>> {
let mut queue = VecDeque::new();

let mut read_offset = 0;
let mut write_offset = 0;

while let Some((codepoint, width)) =
unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) }
{
read_offset += width;
// Queue must be flushed before encode_to_slice_or_else_to_queue is
// called to ensure proper order of bytes
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
let lowercase_char = unsafe { char::from_u32_unchecked(codepoint) };
for c in lowercase_char.to_uppercase() {
encode_to_slice_or_else_to_queue(
c,
&mut queue,
&mut self[..read_offset],
&mut write_offset,
);
}
}
assert_eq!(read_offset, self.len());
if write_offset < read_offset { Ok(write_offset) } else { Err(queue) }
}

#[rustc_allow_incoherent_impl]
#[unstable(issue = "none", feature = "std_internals")]
#[allow(dead_code)]
/// Safety:
/// - Must be valid UTF-8
pub unsafe fn make_utf8_lowercase(&mut self) -> Result<usize, VecDeque<u8>> {
let mut queue = VecDeque::new();

let mut read_offset = 0;
let mut write_offset = 0;

let mut final_sigma_automata = FinalSigmaAutomata::new();
while let Some((codepoint, width)) =
unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) }
{
read_offset += width;
// Queue must be flushed before encode_to_slice_or_else_to_queue is
// called to ensure proper order of bytes
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
let uppercase_char = unsafe { char::from_u32_unchecked(codepoint) };
if uppercase_char == 'Σ' {
// Σ maps to σ, except at the end of a word where it maps to ς.
// See core::str::to_lowercase
let rest = unsafe { core::str::from_utf8_unchecked(&self[read_offset..]) };
let is_word_final =
final_sigma_automata.is_accepting() && !case_ignorable_then_cased(rest.chars());
let sigma_lowercase = if is_word_final { 'ς' } else { 'σ' };
encode_to_slice_or_else_to_queue(
sigma_lowercase,
&mut queue,
&mut self[..read_offset],
&mut write_offset,
);
} else {
for c in uppercase_char.to_lowercase() {
encode_to_slice_or_else_to_queue(
c,
&mut queue,
&mut self[..read_offset],
&mut write_offset,
);
}
}
final_sigma_automata.step(uppercase_char);
}
assert_eq!(read_offset, self.len());
return if write_offset < read_offset { Ok(write_offset) } else { Err(queue) };

// For now this is copy pasted from core::str, FIXME: DRY
fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
use core::unicode::{Case_Ignorable, Cased};
match iter.skip_while(|&c| Case_Ignorable(c)).next() {
Some(c) => Cased(c),
None => false,
}
}
}
}

fn encode_to_slice_or_else_to_queue(
c: char,
queue: &mut VecDeque<u8>,
slice: &mut [u8],
write_offset: &mut usize,
) {
let mut buffer = [0; 4];
let len = c.encode_utf8(&mut buffer).len();
let writable_slice = &mut slice[*write_offset..];
let direct_copy_length = core::cmp::min(len, writable_slice.len());
writable_slice[..direct_copy_length].copy_from_slice(&buffer[..direct_copy_length]);
*write_offset += direct_copy_length;
queue.extend(&buffer[direct_copy_length..len]);
}

fn dump_queue(queue: &mut VecDeque<u8>, slice: &mut [u8], write_offset: &mut usize) {
while *write_offset < slice.len() {
match queue.pop_front() {
Some(b) => {
slice[*write_offset] = b;
*write_offset += 1;
}
None => break,
}
}
}

#[derive(Clone)]
enum FinalSigmaAutomata {
Init,
Accepted,
}

impl FinalSigmaAutomata {
fn new() -> Self {
Self::Init
}

fn is_accepting(&self) -> bool {
match self {
FinalSigmaAutomata::Accepted => true,
FinalSigmaAutomata::Init => false,
}
}

fn step(&mut self, c: char) {
use core::unicode::{Case_Ignorable, Cased};

use FinalSigmaAutomata::*;
*self = match self {
Init => {
if Cased(c) {
Accepted
} else {
Init
}
}
Accepted => {
if Cased(c) || Case_Ignorable(c) {
Accepted
} else {
Init
}
}
}
}
}

#[cfg(not(test))]
impl [u8] {
/// Returns a vector containing a copy of this slice where each byte
Expand Down
162 changes: 162 additions & 0 deletions library/alloc/src/slice/byte_slice_make_case.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
use crate::collections::VecDeque;

#[cfg(not(test))]
impl [u8] {
#[rustc_allow_incoherent_impl]
#[unstable(issue = "none", feature = "std_internals")]
#[allow(dead_code)]
/// Safety:
/// - Must be valid UTF-8
pub unsafe fn make_utf8_uppercase(&mut self) -> Result<usize, VecDeque<u8>> {
let mut queue = VecDeque::new();

let mut read_offset = 0;
let mut write_offset = 0;

while let Some((codepoint, width)) =
unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) }
{
read_offset += width;
// Queue must be flushed before encode_to_slice_or_else_to_queue is
// called to ensure proper order of bytes
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
let lowercase_char = unsafe { char::from_u32_unchecked(codepoint) };
for c in lowercase_char.to_uppercase() {
encode_to_slice_or_else_to_queue(
c,
&mut queue,
&mut self[..read_offset],
&mut write_offset,
);
}
}
assert_eq!(read_offset, self.len());
if write_offset < read_offset { Ok(write_offset) } else { Err(queue) }
}

#[rustc_allow_incoherent_impl]
#[unstable(issue = "none", feature = "std_internals")]
#[allow(dead_code)]
/// Safety:
/// - Must be valid UTF-8
pub unsafe fn make_utf8_lowercase(&mut self) -> Result<usize, VecDeque<u8>> {
let mut queue = VecDeque::new();

let mut read_offset = 0;
let mut write_offset = 0;

let mut final_sigma_automata = FinalSigmaAutomata::new();
while let Some((codepoint, width)) =
unsafe { core::str::next_code_point_with_width(&mut self[read_offset..].iter()) }
{
read_offset += width;
// Queue must be flushed before encode_to_slice_or_else_to_queue is
// called to ensure proper order of bytes
dump_queue(&mut queue, &mut self[..read_offset], &mut write_offset);
let uppercase_char = unsafe { char::from_u32_unchecked(codepoint) };
if uppercase_char == 'Σ' {
// Σ maps to σ, except at the end of a word where it maps to ς.
// See core::str::to_lowercase
let rest = unsafe { core::str::from_utf8_unchecked(&self[read_offset..]) };
let is_word_final =
final_sigma_automata.is_accepting() && !case_ignorable_then_cased(rest.chars());
let sigma_lowercase = if is_word_final { 'ς' } else { 'σ' };
encode_to_slice_or_else_to_queue(
sigma_lowercase,
&mut queue,
&mut self[..read_offset],
&mut write_offset,
);
} else {
for c in uppercase_char.to_lowercase() {
encode_to_slice_or_else_to_queue(
c,
&mut queue,
&mut self[..read_offset],
&mut write_offset,
);
}
}
final_sigma_automata.step(uppercase_char);
}
assert_eq!(read_offset, self.len());
return if write_offset < read_offset { Ok(write_offset) } else { Err(queue) };

// For now this is copy pasted from core::str, FIXME: DRY
fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
use core::unicode::{Case_Ignorable, Cased};
match iter.skip_while(|&c| Case_Ignorable(c)).next() {
Some(c) => Cased(c),
None => false,
}
}
}
}

fn encode_to_slice_or_else_to_queue(
c: char,
queue: &mut VecDeque<u8>,
slice: &mut [u8],
write_offset: &mut usize,
) {
let mut buffer = [0; 4];
let len = c.encode_utf8(&mut buffer).len();
let writable_slice = &mut slice[*write_offset..];
let direct_copy_length = core::cmp::min(len, writable_slice.len());
writable_slice[..direct_copy_length].copy_from_slice(&buffer[..direct_copy_length]);
*write_offset += direct_copy_length;
queue.extend(&buffer[direct_copy_length..len]);
}

fn dump_queue(queue: &mut VecDeque<u8>, slice: &mut [u8], write_offset: &mut usize) {
while *write_offset < slice.len() {
match queue.pop_front() {
Some(b) => {
slice[*write_offset] = b;
*write_offset += 1;
}
None => break,
}
}
}

#[derive(Clone)]
enum FinalSigmaAutomata {
Init,
Accepted,
}

impl FinalSigmaAutomata {
fn new() -> Self {
Self::Init
}

fn is_accepting(&self) -> bool {
match self {
FinalSigmaAutomata::Accepted => true,
FinalSigmaAutomata::Init => false,
}
}

fn step(&mut self, c: char) {
use core::unicode::{Case_Ignorable, Cased};

use FinalSigmaAutomata::*;
*self = match self {
Init => {
if Cased(c) {
Accepted
} else {
Init
}
}
Accepted => {
if Cased(c) || Case_Ignorable(c) {
Accepted
} else {
Init
}
}
}
}
}
2 changes: 2 additions & 0 deletions library/alloc/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,7 @@ impl String {
self.vec.extend_from_slice(string.as_bytes())
}

#[cfg(not(no_global_oom_handling))]
#[unstable(feature = "string_make_uplowercase", issue = "135885")]
#[allow(missing_docs)]
pub fn make_uppercase(&mut self) {
Expand All @@ -1139,6 +1140,7 @@ impl String {
*self = unsafe { Self::from_utf8_unchecked(v) }
}

#[cfg(not(no_global_oom_handling))]
#[unstable(feature = "string_make_uplowercase", issue = "135885")]
#[allow(missing_docs)]
pub fn make_lowercase(&mut self) {
Expand Down

0 comments on commit 6f1f32e

Please sign in to comment.