Skip to content

Commit

Permalink
collections: Implement String::drain(range) according to RFC 574
Browse files Browse the repository at this point in the history
`.drain(range)` is unstable and under feature(collections_drain).

This adds a safe way to remove any range of a String as efficiently as
possible.

As noted in the code, this drain iterator has none of the memory safety
issues of the vector version.

RFC tracking issue is rust-lang#23055
  • Loading branch information
Ulrik Sverdrup committed May 1, 2015
1 parent 42bfeec commit 0fd3e8c
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 1 deletion.
116 changes: 115 additions & 1 deletion src/libcollections/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use core::prelude::*;
use core::fmt;
use core::hash;
use core::iter::FromIterator;
use core::marker::PhantomData;
use core::mem;
use core::ops::{self, Deref, Add, Index};
use core::ptr;
Expand All @@ -26,7 +27,8 @@ use rustc_unicode::str as unicode_str;
use rustc_unicode::str::Utf16Item;

use borrow::{Cow, IntoCow};
use str::{self, FromStr, Utf8Error};
use range::RangeArgument;
use str::{self, FromStr, Utf8Error, Chars};
use vec::{DerefVec, Vec, as_vec};

/// A growable string stored as a UTF-8 encoded buffer.
Expand Down Expand Up @@ -695,6 +697,60 @@ impl String {
pub fn clear(&mut self) {
self.vec.clear()
}

/// Create a draining iterator that removes the specified range in the string
/// and yields the removed chars from start to end. The element range is
/// removed even if the iterator is not consumed until the end.
///
/// # Panics
///
/// Panics if the starting point or end point are not on character boundaries,
/// or if they are out of bounds.
///
/// # Examples
///
/// ```
/// # #![feature(collections_drain)]
///
/// let mut s = String::from("α is alpha, β is beta");
/// let beta_offset = s.find('β').unwrap_or(s.len());
///
/// // Remove the range up until the β from the string
/// let t: String = s.drain_range(..beta_offset).collect();
/// assert_eq!(t, "α is alpha, ");
/// assert_eq!(s, "β is beta");
///
/// // A full range clears the string
/// s.drain_range(..);
/// assert_eq!(s, "");
/// ```
#[unstable(feature = "collections_drain",
reason = "recently added, matches RFC")]
pub fn drain<R>(&mut self, range: R) -> Drain where R: RangeArgument<usize> {
// Memory safety
//
// The String version of Drain does not have the memory safety issues
// of the vector version. The data is just plain bytes.
// Because the range removal happens in Drop, if the Drain iterator is leaked,
// the removal will not happen.
let len = self.len();
let start = *range.start().unwrap_or(&0);
let end = *range.end().unwrap_or(&len);

// Take out two simultaneous borrows. The &mut String won't be accessed
// until iteration is over, in Drop.
let self_ptr = self as *mut _;
// slicing does the appropriate bounds checks
let chars_iter = self[start..end].chars();

Drain {
start: start,
tail_start: end,
iter: chars_iter,
string: self_ptr,
_marker: PhantomData,
}
}
}

impl FromUtf8Error {
Expand Down Expand Up @@ -1072,3 +1128,61 @@ impl fmt::Write for String {
Ok(())
}
}

/// A draining iterator for `String`.
#[unstable(feature = "collections_drain", reason = "recently added")]
pub struct Drain<'a> {
string: *mut String,
/// Start of part to remove
start: usize,
/// Index of tail to preserve
tail_start: usize,
/// Current remaining range to remove
iter: Chars<'a>,
_marker: PhantomData<&'a mut String>,
}

unsafe impl<'a> Sync for Drain<'a> {}
unsafe impl<'a> Send for Drain<'a> {}

#[unstable(feature = "collections_drain", reason = "recently added")]
impl<'a> Drop for Drain<'a> {
fn drop(&mut self) {
unsafe {
// memmove back untouched tail, then truncate & reset length
let self_vec = (*self.string).as_mut_vec();
let tail_len = self_vec.len() - self.tail_start;
if tail_len > 0 {
let src = self_vec.as_ptr().offset(self.tail_start as isize);
let dst = self_vec.as_mut_ptr().offset(self.start as isize);
ptr::copy(src, dst, tail_len);
}
self_vec.set_len(self.start + tail_len);
}
}
}

#[unstable(feature = "collections_drain", reason = "recently added")]
impl<'a> Iterator for Drain<'a> {
type Item = char;

#[inline]
fn next(&mut self) -> Option<char> {
self.iter.next()
}

fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}

#[unstable(feature = "collections_drain", reason = "recently added")]
impl<'a> DoubleEndedIterator for Drain<'a> {
#[inline]
fn next_back(&mut self) -> Option<char> {
self.iter.next_back()
}
}

#[unstable(feature = "collections_drain", reason = "recently added")]
impl<'a> ExactSizeIterator for Drain<'a> { }
17 changes: 17 additions & 0 deletions src/libcollectionstest/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,23 @@ fn test_from_iterator() {
assert_eq!(s, d);
}

#[test]
fn test_drain() {
let mut s = String::from("αβγ");
assert_eq!(s.drain(2..4).collect::<String>(), "β");
assert_eq!(s, "αγ");

let mut t = String::from("abcd");
t.drain(..0);
assert_eq!(t, "abcd");
t.drain(..1);
assert_eq!(t, "bcd");
t.drain(3..);
assert_eq!(t, "bcd");
t.drain(..);
assert_eq!(t, "");
}

#[bench]
fn bench_with_capacity(b: &mut Bencher) {
b.iter(|| {
Expand Down
2 changes: 2 additions & 0 deletions src/test/run-pass/sync-send-iterators-in-libcollections.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use collections::{BitSet, BitVec};
use collections::{BTreeMap, BTreeSet};
use collections::EnumSet;
use collections::LinkedList;
use collections::String;
use collections::Vec;
use collections::VecDeque;
use collections::VecMap;
Expand Down Expand Up @@ -99,4 +100,5 @@ fn main() {

all_sync_send!(Vec::<usize>::new(), into_iter);
is_sync_send!(Vec::<usize>::new(), drain(..));
is_sync_send!(String::new(), drain(..));
}

0 comments on commit 0fd3e8c

Please sign in to comment.