Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement feature sort_unstable #40601

Merged
merged 9 commits into from Mar 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/libcollections/benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#![deny(warnings)]

#![feature(rand)]
#![feature(sort_unstable)]
#![feature(test)]

extern crate test;
Expand Down
110 changes: 61 additions & 49 deletions src/libcollections/benches/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ fn random_inserts(b: &mut Bencher) {
}
})
}

#[bench]
fn random_removes(b: &mut Bencher) {
let mut rng = thread_rng();
Expand Down Expand Up @@ -216,65 +217,76 @@ fn gen_mostly_descending(len: usize) -> Vec<u64> {
v
}

fn gen_big_random(len: usize) -> Vec<[u64; 16]> {
fn gen_strings(len: usize) -> Vec<String> {
let mut rng = thread_rng();
rng.gen_iter().map(|x| [x; 16]).take(len).collect()
}

fn gen_big_ascending(len: usize) -> Vec<[u64; 16]> {
(0..len as u64).map(|x| [x; 16]).take(len).collect()
let mut v = vec![];
for _ in 0..len {
let n = rng.gen::<usize>() % 20 + 1;
v.push(rng.gen_ascii_chars().take(n).collect());
}
v
}

fn gen_big_descending(len: usize) -> Vec<[u64; 16]> {
(0..len as u64).rev().map(|x| [x; 16]).take(len).collect()
fn gen_big_random(len: usize) -> Vec<[u64; 16]> {
let mut rng = thread_rng();
rng.gen_iter().map(|x| [x; 16]).take(len).collect()
}

macro_rules! sort_bench {
($name:ident, $gen:expr, $len:expr) => {
macro_rules! sort {
($f:ident, $name:ident, $gen:expr, $len:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
b.iter(|| $gen($len).sort());
b.iter(|| $gen($len).$f());
b.bytes = $len * mem::size_of_val(&$gen(1)[0]) as u64;
}
}
}

sort_bench!(sort_small_random, gen_random, 10);
sort_bench!(sort_small_ascending, gen_ascending, 10);
sort_bench!(sort_small_descending, gen_descending, 10);

sort_bench!(sort_small_big_random, gen_big_random, 10);
sort_bench!(sort_small_big_ascending, gen_big_ascending, 10);
sort_bench!(sort_small_big_descending, gen_big_descending, 10);

sort_bench!(sort_medium_random, gen_random, 100);
sort_bench!(sort_medium_ascending, gen_ascending, 100);
sort_bench!(sort_medium_descending, gen_descending, 100);

sort_bench!(sort_large_random, gen_random, 10000);
sort_bench!(sort_large_ascending, gen_ascending, 10000);
sort_bench!(sort_large_descending, gen_descending, 10000);
sort_bench!(sort_large_mostly_ascending, gen_mostly_ascending, 10000);
sort_bench!(sort_large_mostly_descending, gen_mostly_descending, 10000);

sort_bench!(sort_large_big_random, gen_big_random, 10000);
sort_bench!(sort_large_big_ascending, gen_big_ascending, 10000);
sort_bench!(sort_large_big_descending, gen_big_descending, 10000);
macro_rules! sort_expensive {
($f:ident, $name:ident, $gen:expr, $len:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
b.iter(|| {
let mut v = $gen($len);
let mut count = 0;
v.$f(|a: &u64, b: &u64| {
count += 1;
if count % 1_000_000_000 == 0 {
panic!("should not happen");
}
(*a as f64).cos().partial_cmp(&(*b as f64).cos()).unwrap()
});
black_box(count);
});
b.bytes = $len as u64 * mem::size_of::<u64>() as u64;
}
}
}

#[bench]
fn sort_large_random_expensive(b: &mut Bencher) {
let len = 10000;
b.iter(|| {
let mut v = gen_random(len);
let mut count = 0;
v.sort_by(|a: &u64, b: &u64| {
count += 1;
if count % 1_000_000_000 == 0 {
panic!("should not happen");
}
(*a as f64).cos().partial_cmp(&(*b as f64).cos()).unwrap()
});
black_box(count);
});
b.bytes = len as u64 * mem::size_of::<u64>() as u64;
}
sort!(sort, sort_small_ascending, gen_ascending, 10);
sort!(sort, sort_small_descending, gen_descending, 10);
sort!(sort, sort_small_random, gen_random, 10);
sort!(sort, sort_small_big_random, gen_big_random, 10);
sort!(sort, sort_medium_random, gen_random, 100);
sort!(sort, sort_large_ascending, gen_ascending, 10000);
sort!(sort, sort_large_descending, gen_descending, 10000);
sort!(sort, sort_large_mostly_ascending, gen_mostly_ascending, 10000);
sort!(sort, sort_large_mostly_descending, gen_mostly_descending, 10000);
sort!(sort, sort_large_random, gen_random, 10000);
sort!(sort, sort_large_big_random, gen_big_random, 10000);
sort!(sort, sort_large_strings, gen_strings, 10000);
sort_expensive!(sort_by, sort_large_random_expensive, gen_random, 10000);

sort!(sort_unstable, sort_unstable_small_ascending, gen_ascending, 10);
sort!(sort_unstable, sort_unstable_small_descending, gen_descending, 10);
sort!(sort_unstable, sort_unstable_small_random, gen_random, 10);
sort!(sort_unstable, sort_unstable_small_big_random, gen_big_random, 10);
sort!(sort_unstable, sort_unstable_medium_random, gen_random, 100);
sort!(sort_unstable, sort_unstable_large_ascending, gen_ascending, 10000);
sort!(sort_unstable, sort_unstable_large_descending, gen_descending, 10000);
sort!(sort_unstable, sort_unstable_large_mostly_ascending, gen_mostly_ascending, 10000);
sort!(sort_unstable, sort_unstable_large_mostly_descending, gen_mostly_descending, 10000);
sort!(sort_unstable, sort_unstable_large_random, gen_random, 10000);
sort!(sort_unstable, sort_unstable_large_big_random, gen_big_random, 10000);
sort!(sort_unstable, sort_unstable_large_strings, gen_strings, 10000);
sort_expensive!(sort_unstable_by, sort_unstable_large_random_expensive, gen_random, 10000);
1 change: 1 addition & 0 deletions src/libcollections/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#![feature(shared)]
#![feature(slice_get_slice)]
#![feature(slice_patterns)]
#![cfg_attr(not(test), feature(sort_unstable))]
#![feature(specialization)]
#![feature(staged_api)]
#![feature(str_internals)]
Expand Down
162 changes: 134 additions & 28 deletions src/libcollections/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1092,6 +1092,39 @@ impl<T> [T] {
merge_sort(self, |a, b| a.lt(b));
}

/// Sorts the slice using `compare` to compare elements.
///
/// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
///
/// # Current implementation
///
/// The current algorithm is an adaptive, iterative merge sort inspired by
/// [timsort](https://en.wikipedia.org/wiki/Timsort).
/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
/// two or more sorted sequences concatenated one after another.
///
/// Also, it allocates temporary storage half the size of `self`, but for short slices a
/// non-allocating insertion sort is used instead.
///
/// # Examples
///
/// ```
/// let mut v = [5, 4, 1, 3, 2];
/// v.sort_by(|a, b| a.cmp(b));
/// assert!(v == [1, 2, 3, 4, 5]);
///
/// // reverse sorting
/// v.sort_by(|a, b| b.cmp(a));
/// assert!(v == [5, 4, 3, 2, 1]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn sort_by<F>(&mut self, mut compare: F)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is not added or anything. It was just reordered with sort_by_key.
Previous order: sort, sort_by_key, sort_by.
Current order: sort, sort_by, sort_by_key.

where F: FnMut(&T, &T) -> Ordering
{
merge_sort(self, |a, b| compare(a, b) == Less);
}

/// Sorts the slice using `f` to extract a key to compare elements by.
///
/// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
Expand Down Expand Up @@ -1122,37 +1155,118 @@ impl<T> [T] {
merge_sort(self, |a, b| f(a).lt(&f(b)));
}

/// Sorts the slice using `compare` to compare elements.
/// Sorts the slice, but may not preserve the order of equal elements.
///
/// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
/// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
/// and `O(n log n)` worst-case.
///
/// # Current implementation
///
/// The current algorithm is an adaptive, iterative merge sort inspired by
/// [timsort](https://en.wikipedia.org/wiki/Timsort).
/// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
/// two or more sorted sequences concatenated one after another.
/// The current algorithm is based on Orson Peters' [pdqsort][pattern-defeating quicksort],
/// which is a quicksort variant designed to be very fast on certain kinds of patterns,
/// sometimes achieving linear time. It is randomized but deterministic, and falls back to
/// heapsort on degenerate inputs.
///
/// Also, it allocates temporary storage half the size of `self`, but for short slices a
/// non-allocating insertion sort is used instead.
/// It is generally faster than stable sorting, except in a few special cases, e.g. when the
/// slice consists of several concatenated sorted sequences.
///
/// # Examples
///
/// ```
/// #![feature(sort_unstable)]
///
/// let mut v = [-5, 4, 1, -3, 2];
///
/// v.sort_unstable();
/// assert!(v == [-5, -3, 1, 2, 4]);
/// ```
///
/// [pdqsort]: https://github.com/orlp/pdqsort
// FIXME #40585: Mention `sort_unstable` in the documentation for `sort`.
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FIXME because we shouldn't mention unstable functions in the documentation for stable functions.... right?
I assume we'll fix this as soon as the feature gets stabilized.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that seems reasonable to me, punting on this for now until the function is stabilized.

#[unstable(feature = "sort_unstable", issue = "40585")]
#[inline]
pub fn sort_unstable(&mut self)
where T: Ord
{
core_slice::SliceExt::sort_unstable(self);
}

/// Sorts the slice using `compare` to compare elements, but may not preserve the order of
/// equal elements.
///
/// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
/// and `O(n log n)` worst-case.
///
/// # Current implementation
///
/// The current algorithm is based on Orson Peters' [pdqsort][pattern-defeating quicksort],
/// which is a quicksort variant designed to be very fast on certain kinds of patterns,
/// sometimes achieving linear time. It is randomized but deterministic, and falls back to
/// heapsort on degenerate inputs.
///
/// It is generally faster than stable sorting, except in a few special cases, e.g. when the
/// slice consists of several concatenated sorted sequences.
///
/// # Examples
///
/// ```
/// #![feature(sort_unstable)]
///
/// let mut v = [5, 4, 1, 3, 2];
/// v.sort_by(|a, b| a.cmp(b));
/// v.sort_unstable_by(|a, b| a.cmp(b));
/// assert!(v == [1, 2, 3, 4, 5]);
///
/// // reverse sorting
/// v.sort_by(|a, b| b.cmp(a));
/// v.sort_unstable_by(|a, b| b.cmp(a));
/// assert!(v == [5, 4, 3, 2, 1]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
///
/// [pdqsort]: https://github.com/orlp/pdqsort
// FIXME #40585: Mention `sort_unstable_by` in the documentation for `sort_by`.
#[unstable(feature = "sort_unstable", issue = "40585")]
#[inline]
pub fn sort_by<F>(&mut self, mut compare: F)
pub fn sort_unstable_by<F>(&mut self, compare: F)
where F: FnMut(&T, &T) -> Ordering
{
merge_sort(self, |a, b| compare(a, b) == Less);
core_slice::SliceExt::sort_unstable_by(self, compare);
}

/// Sorts the slice using `f` to extract a key to compare elements by, but may not preserve the
/// order of equal elements.
///
/// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
/// and `O(n log n)` worst-case.
///
/// # Current implementation
///
/// The current algorithm is based on Orson Peters' [pdqsort][pattern-defeating quicksort],
/// which is a quicksort variant designed to be very fast on certain kinds of patterns,
/// sometimes achieving linear time. It is randomized but deterministic, and falls back to
/// heapsort on degenerate inputs.
///
/// It is generally faster than stable sorting, except in a few special cases, e.g. when the
/// slice consists of several concatenated sorted sequences.
///
/// # Examples
///
/// ```
/// #![feature(sort_unstable)]
///
/// let mut v = [-5i32, 4, 1, -3, 2];
///
/// v.sort_unstable_by_key(|k| k.abs());
/// assert!(v == [1, 2, -3, 4, -5]);
/// ```
///
/// [pdqsort]: https://github.com/orlp/pdqsort
// FIXME #40585: Mention `sort_unstable_by_key` in the documentation for `sort_by_key`.
#[unstable(feature = "sort_unstable", issue = "40585")]
#[inline]
pub fn sort_unstable_by_key<B, F>(&mut self, f: F)
where F: FnMut(&T) -> B,
B: Ord
{
core_slice::SliceExt::sort_unstable_by_key(self, f);
}

/// Copies the elements from `src` into `self`.
Expand Down Expand Up @@ -1553,28 +1667,20 @@ unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F)
fn merge_sort<T, F>(v: &mut [T], mut is_less: F)
where F: FnMut(&T, &T) -> bool
{
// Slices of up to this length get sorted using insertion sort.
const MAX_INSERTION: usize = 20;
// Very short runs are extended using insertion sort to span at least this many elements.
const MIN_RUN: usize = 10;

// Sorting has no meaningful behavior on zero-sized types.
if size_of::<T>() == 0 {
return;
}

// FIXME #12092: These numbers are platform-specific and need more extensive testing/tuning.
//
// If `v` has length up to `max_insertion`, simply switch to insertion sort because it is going
// to perform better than merge sort. For bigger types `T`, the threshold is smaller.
//
// Short runs are extended using insertion sort to span at least `min_run` elements, in order
// to improve performance.
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe it's worthwhile differentiating between big and small Ts - it doesn't have a large effect.

Instead, it's better to focus on some middle ground that works well all around: big/small Ts and expensive/cheap comparison functions as well. I took a look at a std::stable_sort implementation in C++ and it has similar constants.

let (max_insertion, min_run) = if size_of::<T>() <= 2 * mem::size_of::<usize>() {
(64, 32)
} else {
(32, 16)
};

let len = v.len();

// Short arrays get sorted in-place via insertion sort to avoid allocations.
if len <= max_insertion {
if len <= MAX_INSERTION {
if len >= 2 {
for i in (0..len-1).rev() {
insert_head(&mut v[i..], &mut is_less);
Expand Down Expand Up @@ -1618,7 +1724,7 @@ fn merge_sort<T, F>(v: &mut [T], mut is_less: F)

// Insert some more elements into the run if it's too short. Insertion sort is faster than
// merge sort on short sequences, so this significantly improves performance.
while start > 0 && end - start < min_run {
while start > 0 && end - start < MIN_RUN {
start -= 1;
insert_head(&mut v[start..end], &mut is_less);
}
Expand Down
14 changes: 4 additions & 10 deletions src/libcollectionstest/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,10 @@ fn test_sort() {
}
}

// shouldn't panic
let mut v: [i32; 0] = [];
v.sort();
// Should not panic.
[0i32; 0].sort();
[(); 10].sort();
[(); 100].sort();

let mut v = [0xDEADBEEFu64];
v.sort();
Expand Down Expand Up @@ -441,13 +442,6 @@ fn test_sort_stability() {
}
}

#[test]
fn test_sort_zero_sized_type() {
// Should not panic.
[(); 10].sort();
[(); 100].sort();
}

#[test]
fn test_concat() {
let v: [Vec<i32>; 0] = [];
Expand Down
Loading