From 755cfbf236240e2dd0f0e7c3d8c6d175b04b8883 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Sun, 3 Dec 2023 15:18:21 +0100 Subject: [PATCH] core: introduce split_at{,_mut}_checked Introduce split_at_checked and split_at_mut_checked methods to slices types (including str) which are non-panicking versions of split_at and split_at_mut respectively. This is analogous to get method being non-panicking version of indexing. --- library/core/src/lib.rs | 1 + library/core/src/slice/mod.rs | 273 ++++++++++++++++++++++++++++++++-- library/core/src/str/mod.rs | 129 +++++++++++++--- 3 files changed, 375 insertions(+), 28 deletions(-) diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 81bf6f28693f1..840c6254641a5 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -184,6 +184,7 @@ #![feature(set_ptr_value)] #![feature(slice_ptr_get)] #![feature(slice_split_at_unchecked)] +#![feature(split_at_checked)] #![feature(str_internals)] #![feature(str_split_inclusive_remainder)] #![feature(str_split_remainder)] diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index ee36d93576b21..ffc3b4486be65 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -1842,7 +1842,8 @@ impl [T] { /// /// # Panics /// - /// Panics if `mid > len`. + /// Panics if `mid > len`. For a non-panicking alternative see + /// [`split_at_checked`](slice::split_at_checked). /// /// # Examples /// @@ -1869,14 +1870,15 @@ impl [T] { /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_slice_split_at_not_mut", since = "1.71.0")] + #[rustc_allow_const_fn_unstable(split_at_checked)] #[inline] #[track_caller] #[must_use] pub const fn split_at(&self, mid: usize) -> (&[T], &[T]) { - assert!(mid <= self.len()); - // SAFETY: `[ptr; mid]` and `[mid; len]` are inside `self`, which - // fulfills the requirements of `split_at_unchecked`. - unsafe { self.split_at_unchecked(mid) } + match self.split_at_checked(mid) { + Some(pair) => pair, + None => panic!("mid > len"), + } } /// Divides one mutable slice into two at an index. @@ -1887,7 +1889,8 @@ impl [T] { /// /// # Panics /// - /// Panics if `mid > len`. + /// Panics if `mid > len`. For a non-panicking alternative see + /// [`split_at_mut_checked`](slice::split_at_mut_checked). /// /// # Examples /// @@ -1906,10 +1909,10 @@ impl [T] { #[must_use] #[rustc_const_unstable(feature = "const_slice_split_at_mut", issue = "101804")] pub const fn split_at_mut(&mut self, mid: usize) -> (&mut [T], &mut [T]) { - assert!(mid <= self.len()); - // SAFETY: `[ptr; mid]` and `[mid; len]` are inside `self`, which - // fulfills the requirements of `from_raw_parts_mut`. - unsafe { self.split_at_mut_unchecked(mid) } + match self.split_at_mut_checked(mid) { + Some(pair) => pair, + None => panic!("mid > len"), + } } /// Divides one slice into two at an index, without doing bounds checking. @@ -2031,6 +2034,256 @@ impl [T] { unsafe { (from_raw_parts_mut(ptr, mid), from_raw_parts_mut(ptr.add(mid), len - mid)) } } + /// Divides one slice into two at an index returning, `None` if slice is too + /// short. + /// + /// The first will contain all indices from `[0, mid)` (excluding + /// the index `mid` itself) and the second will contain all + /// indices from `[mid, len)` (excluding the index `len` itself). + /// + /// Returns `None` if `mid > len`. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_at_checked)] + /// + /// let v = [1, 2, 3, 4, 5, 6]; + /// + /// { + /// let (left, right) = v.split_at_checked(0).unwrap(); + /// assert_eq!(left, []); + /// assert_eq!(right, [1, 2, 3, 4, 5, 6]); + /// } + /// + /// { + /// let (left, right) = v.split_at_checked(2).unwrap(); + /// assert_eq!(left, [1, 2]); + /// assert_eq!(right, [3, 4, 5, 6]); + /// } + /// + /// { + /// let (left, right) = v.split_at_checked(6).unwrap(); + /// assert_eq!(left, [1, 2, 3, 4, 5, 6]); + /// assert_eq!(right, []); + /// } + /// + /// assert_eq!(None, v.split_at_checked(7)); + /// ``` + #[unstable(feature = "split_at_checked", reason = "new API", issue = "119128")] + #[rustc_const_unstable(feature = "split_at_checked", issue = "119128")] + #[inline] + #[track_caller] + #[must_use] + pub const fn split_at_checked(&self, mid: usize) -> Option<(&[T], &[T])> { + if mid <= self.len() { + // SAFETY: `[ptr; mid]` and `[mid; len]` are inside `self`, which + // fulfills the requirements of `split_at_unchecked`. + Some(unsafe { self.split_at_unchecked(mid) }) + } else { + None + } + } + + /// Divides one mutable slice into two at an index, returning `None` if + /// slice is too short. + /// + /// The first will contain all indices from `[0, mid)` (excluding + /// the index `mid` itself) and the second will contain all + /// indices from `[mid, len)` (excluding the index `len` itself). + /// + /// Returns `None` if `mid > len`. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_at_checked)] + /// + /// let mut v = [1, 0, 3, 0, 5, 6]; + /// + /// if let Some((left, right)) = v.split_at_mut_checked(2) { + /// assert_eq!(left, [1, 0]); + /// assert_eq!(right, [3, 0, 5, 6]); + /// left[1] = 2; + /// right[1] = 4; + /// } + /// assert_eq!(v, [1, 2, 3, 4, 5, 6]); + /// + /// assert_eq!(None, v.split_at_mut_checked(7)); + /// ``` + #[unstable(feature = "split_at_checked", reason = "new API", issue = "119128")] + #[rustc_const_unstable(feature = "split_at_checked", issue = "119128")] + #[inline] + #[track_caller] + #[must_use] + pub const fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut [T], &mut [T])> { + if mid <= self.len() { + // SAFETY: `[ptr; mid]` and `[mid; len]` are inside `self`, which + // fulfills the requirements of `split_at_unchecked`. + Some(unsafe { self.split_at_mut_unchecked(mid) }) + } else { + None + } + } + + /// Divides one slice into an array and a remainder slice at an index. + /// + /// The array will contain all indices from `[0, N)` (excluding + /// the index `N` itself) and the slice will contain all + /// indices from `[N, len)` (excluding the index `len` itself). + /// + /// # Panics + /// + /// Panics if `N > len`. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_array)] + /// + /// let v = &[1, 2, 3, 4, 5, 6][..]; + /// + /// { + /// let (left, right) = v.split_array_ref::<0>(); + /// assert_eq!(left, &[]); + /// assert_eq!(right, [1, 2, 3, 4, 5, 6]); + /// } + /// + /// { + /// let (left, right) = v.split_array_ref::<2>(); + /// assert_eq!(left, &[1, 2]); + /// assert_eq!(right, [3, 4, 5, 6]); + /// } + /// + /// { + /// let (left, right) = v.split_array_ref::<6>(); + /// assert_eq!(left, &[1, 2, 3, 4, 5, 6]); + /// assert_eq!(right, []); + /// } + /// ``` + #[unstable(feature = "split_array", reason = "new API", issue = "90091")] + #[inline] + #[track_caller] + #[must_use] + pub fn split_array_ref(&self) -> (&[T; N], &[T]) { + let (a, b) = self.split_at(N); + // SAFETY: a points to [T; N]? Yes it's [T] of length N (checked by split_at) + unsafe { (&*(a.as_ptr() as *const [T; N]), b) } + } + + /// Divides one mutable slice into an array and a remainder slice at an index. + /// + /// The array will contain all indices from `[0, N)` (excluding + /// the index `N` itself) and the slice will contain all + /// indices from `[N, len)` (excluding the index `len` itself). + /// + /// # Panics + /// + /// Panics if `N > len`. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_array)] + /// + /// let mut v = &mut [1, 0, 3, 0, 5, 6][..]; + /// let (left, right) = v.split_array_mut::<2>(); + /// assert_eq!(left, &mut [1, 0]); + /// assert_eq!(right, [3, 0, 5, 6]); + /// left[1] = 2; + /// right[1] = 4; + /// assert_eq!(v, [1, 2, 3, 4, 5, 6]); + /// ``` + #[unstable(feature = "split_array", reason = "new API", issue = "90091")] + #[inline] + #[track_caller] + #[must_use] + pub fn split_array_mut(&mut self) -> (&mut [T; N], &mut [T]) { + let (a, b) = self.split_at_mut(N); + // SAFETY: a points to [T; N]? Yes it's [T] of length N (checked by split_at_mut) + unsafe { (&mut *(a.as_mut_ptr() as *mut [T; N]), b) } + } + + /// Divides one slice into an array and a remainder slice at an index from + /// the end. + /// + /// The slice will contain all indices from `[0, len - N)` (excluding + /// the index `len - N` itself) and the array will contain all + /// indices from `[len - N, len)` (excluding the index `len` itself). + /// + /// # Panics + /// + /// Panics if `N > len`. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_array)] + /// + /// let v = &[1, 2, 3, 4, 5, 6][..]; + /// + /// { + /// let (left, right) = v.rsplit_array_ref::<0>(); + /// assert_eq!(left, [1, 2, 3, 4, 5, 6]); + /// assert_eq!(right, &[]); + /// } + /// + /// { + /// let (left, right) = v.rsplit_array_ref::<2>(); + /// assert_eq!(left, [1, 2, 3, 4]); + /// assert_eq!(right, &[5, 6]); + /// } + /// + /// { + /// let (left, right) = v.rsplit_array_ref::<6>(); + /// assert_eq!(left, []); + /// assert_eq!(right, &[1, 2, 3, 4, 5, 6]); + /// } + /// ``` + #[unstable(feature = "split_array", reason = "new API", issue = "90091")] + #[inline] + #[must_use] + pub fn rsplit_array_ref(&self) -> (&[T], &[T; N]) { + assert!(N <= self.len()); + let (a, b) = self.split_at(self.len() - N); + // SAFETY: b points to [T; N]? Yes it's [T] of length N (checked by split_at) + unsafe { (a, &*(b.as_ptr() as *const [T; N])) } + } + + /// Divides one mutable slice into an array and a remainder slice at an + /// index from the end. + /// + /// The slice will contain all indices from `[0, len - N)` (excluding + /// the index `N` itself) and the array will contain all + /// indices from `[len - N, len)` (excluding the index `len` itself). + /// + /// # Panics + /// + /// Panics if `N > len`. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_array)] + /// + /// let mut v = &mut [1, 0, 3, 0, 5, 6][..]; + /// let (left, right) = v.rsplit_array_mut::<4>(); + /// assert_eq!(left, [1, 0]); + /// assert_eq!(right, &mut [3, 0, 5, 6]); + /// left[1] = 2; + /// right[1] = 4; + /// assert_eq!(v, [1, 2, 3, 4, 5, 6]); + /// ``` + #[unstable(feature = "split_array", reason = "new API", issue = "90091")] + #[inline] + #[must_use] + pub fn rsplit_array_mut(&mut self) -> (&mut [T], &mut [T; N]) { + assert!(N <= self.len()); + let (a, b) = self.split_at_mut(self.len() - N); + // SAFETY: b points to [T; N]? Yes it's [T] of length N (checked by split_at_mut) + unsafe { (a, &mut *(b.as_mut_ptr() as *mut [T; N])) } + } + /// Returns an iterator over subslices separated by elements that match /// `pred`. The matched element is not contained in the subslices. /// diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index a22c46edce254..ebda6e994a773 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -641,8 +641,9 @@ impl str { /// /// # Panics /// - /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is - /// past the end of the last code point of the string slice. + /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is past + /// the end of the last code point of the string slice. For non-panicking + /// alternative see [`split_at_checked`](str::split_at_checked). /// /// # Examples /// @@ -658,12 +659,9 @@ impl str { #[must_use] #[stable(feature = "str_split_at", since = "1.4.0")] pub fn split_at(&self, mid: usize) -> (&str, &str) { - // is_char_boundary checks that the index is in [0, .len()] - if self.is_char_boundary(mid) { - // SAFETY: just checked that `mid` is on a char boundary. - unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) } - } else { - slice_error_fail(self, 0, mid) + match self.split_at_checked(mid) { + None => slice_error_fail(self, 0, mid), + Some(pair) => pair, } } @@ -681,8 +679,9 @@ impl str { /// /// # Panics /// - /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is - /// past the end of the last code point of the string slice. + /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is past + /// the end of the last code point of the string slice. For non-panicking + /// alternative see [`split_at_mut_checked`](str::split_at_mut_checked). /// /// # Examples /// @@ -702,20 +701,114 @@ impl str { pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) { // is_char_boundary checks that the index is in [0, .len()] if self.is_char_boundary(mid) { - let len = self.len(); - let ptr = self.as_mut_ptr(); // SAFETY: just checked that `mid` is on a char boundary. - unsafe { - ( - from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)), - from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr.add(mid), len - mid)), - ) - } + unsafe { self.split_at_mut_unchecked(mid) } } else { slice_error_fail(self, 0, mid) } } + /// Divide one string slice into two at an index. + /// + /// The argument, `mid`, should be a valid byte offset from the start of the + /// string. It must also be on the boundary of a UTF-8 code point. The + /// method returns `None` if that’s not the case. + /// + /// The two slices returned go from the start of the string slice to `mid`, + /// and from `mid` to the end of the string slice. + /// + /// To get mutable string slices instead, see the [`split_at_mut_checked`] + /// method. + /// + /// [`split_at_mut_checked`]: str::split_at_mut_checked + /// + /// # Examples + /// + /// ``` + /// #![feature(split_at_checked)] + /// + /// let s = "Per Martin-Löf"; + /// + /// let (first, last) = s.split_at_checked(3).unwrap(); + /// assert_eq!("Per", first); + /// assert_eq!(" Martin-Löf", last); + /// + /// assert_eq!(None, s.split_at_checked(13)); // Inside “ö” + /// assert_eq!(None, s.split_at_checked(16)); // Beyond the string length + /// ``` + #[inline] + #[must_use] + #[unstable(feature = "split_at_checked", reason = "new API", issue = "119128")] + pub fn split_at_checked(&self, mid: usize) -> Option<(&str, &str)> { + // is_char_boundary checks that the index is in [0, .len()] + if self.is_char_boundary(mid) { + // SAFETY: just checked that `mid` is on a char boundary. + Some(unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) }) + } else { + None + } + } + + /// Divide one mutable string slice into two at an index. + /// + /// The argument, `mid`, should be a valid byte offset from the start of the + /// string. It must also be on the boundary of a UTF-8 code point. The + /// method returns `None` if that’s not the case. + /// + /// The two slices returned go from the start of the string slice to `mid`, + /// and from `mid` to the end of the string slice. + /// + /// To get immutable string slices instead, see the [`split_at_checked`] method. + /// + /// [`split_at_checked`]: str::split_at_checked + /// + /// # Examples + /// + /// ``` + /// #![feature(split_at_checked)] + /// + /// let mut s = "Per Martin-Löf".to_string(); + /// if let Some((first, last)) = s.split_at_mut_checked(3) { + /// first.make_ascii_uppercase(); + /// assert_eq!("PER", first); + /// assert_eq!(" Martin-Löf", last); + /// } + /// assert_eq!("PER Martin-Löf", s); + /// + /// assert_eq!(None, s.split_at_mut_checked(13)); // Inside “ö” + /// assert_eq!(None, s.split_at_mut_checked(16)); // Beyond the string length + /// ``` + #[inline] + #[must_use] + #[unstable(feature = "split_at_checked", reason = "new API", issue = "119128")] + pub fn split_at_mut_checked(&mut self, mid: usize) -> Option<(&mut str, &mut str)> { + // is_char_boundary checks that the index is in [0, .len()] + if self.is_char_boundary(mid) { + // SAFETY: just checked that `mid` is on a char boundary. + Some(unsafe { self.split_at_mut_unchecked(mid) }) + } else { + None + } + } + + /// Divide one string slice into two at an index. + /// + /// # Safety + /// + /// The caller must ensure that `mid` is a valid byte offset from the start + /// of the string and falls on the boundary of a UTF-8 code point. + unsafe fn split_at_mut_unchecked(&mut self, mid: usize) -> (&mut str, &mut str) { + let len = self.len(); + let ptr = self.as_mut_ptr(); + // SAFETY: caller guarantees `mid` is on a char boundary. + unsafe { + ( + from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)), + from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr.add(mid), len - mid)), + ) + } + } + /// Returns an iterator over the [`char`]s of a string slice. /// /// As a string slice consists of valid UTF-8, we can iterate through a