diff --git a/README.md b/README.md
index e5e3b15..5b484b4 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
[![PyPI Latest Release](https://img.shields.io/pypi/v/tsdownsample.svg)](https://pypi.org/project/tsdownsample/)
[![support-version](https://img.shields.io/pypi/pyversions/tsdownsample)](https://img.shields.io/pypi/pyversions/tsdownsample)
-[![Downloads](https://pepy.tech/badge/tsdownsample)](https://pepy.tech/project/tsdownsample)
+[![Downloads](https://static.pepy.tech/badge/tsdownsample)](https://pepy.tech/project/tsdownsample)
[![CodeQL](https://github.com/predict-idlab/tsdownsample/actions/workflows/codeql.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/codeql.yml)
[![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-downsample_rs.yml)
[![Testing](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml/badge.svg)](https://github.com/predict-idlab/tsdownsample/actions/workflows/ci-tsdownsample.yml)
@@ -109,7 +109,8 @@ The following downsampling algorithms (classes) are implemented:
| `LTTBDownsampler` | performs the [**Largest Triangle Three Buckets**](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm | `n_threads` |
| `MinMaxLTTBDownsampler` | (_new two-step algorithm 🎉_) first selects `n_out` \* `minmax_ratio` **min and max** values, then further reduces these to `n_out` values using the **Largest Triangle Three Buckets** algorithm | `n_threads`, `minmax_ratio`\* |
-\*Default value for `minmax_ratio` is 30, which is empirically proven to be a good default. (More details in our upcoming paper)
+
+*Default value for `minmax_ratio` is 4, which is empirically proven to be a good default. More details here: https://arxiv.org/abs/2305.00332
### Handling NaNs
diff --git a/downsample_rs/benches/bench_m4.rs b/downsample_rs/benches/bench_m4.rs
index 11b28ed..c40df6a 100644
--- a/downsample_rs/benches/bench_m4.rs
+++ b/downsample_rs/benches/bench_m4.rs
@@ -6,10 +6,7 @@ use dev_utils::{config, utils};
fn m4_f32_random_array_long_single_core(c: &mut Criterion) {
let n = config::ARRAY_LENGTH_LONG;
let data = utils::get_random_array::(n, f32::MIN, f32::MAX);
- c.bench_function("m4_scal_f32", |b| {
- b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
- });
- c.bench_function("m4_simd_f32", |b| {
+ c.bench_function("m4_f32", |b| {
b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
});
}
@@ -18,16 +15,7 @@ fn m4_f32_random_array_long_multi_core(c: &mut Criterion) {
let n = config::ARRAY_LENGTH_LONG;
let data = utils::get_random_array::(n, f32::MIN, f32::MAX);
let all_threads: usize = utils::get_all_threads();
- c.bench_function("m4_scal_p_f32", |b| {
- b.iter(|| {
- m4_mod::m4_without_x_parallel(
- black_box(data.as_slice()),
- black_box(2_000),
- black_box(all_threads),
- )
- })
- });
- c.bench_function("m4_simd_p_f32", |b| {
+ c.bench_function("m4_p_f32", |b| {
b.iter(|| {
m4_mod::m4_without_x_parallel(
black_box(data.as_slice()),
@@ -42,22 +30,10 @@ fn m4_f32_random_array_50M_single_core(c: &mut Criterion) {
let n = 50_000_000;
let data = utils::get_random_array::(n, f32::MIN, f32::MAX);
let x = (0..n).map(|i| i as i32).collect::>();
- c.bench_function("m4_scal_50M_f32", |b| {
+ c.bench_function("m4_50M_f32", |b| {
b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
});
- c.bench_function("m4_simd_50M_f32", |b| {
- b.iter(|| m4_mod::m4_without_x(black_box(data.as_slice()), black_box(2_000)))
- });
- c.bench_function("m4_scalx_50M_f32", |b| {
- b.iter(|| {
- m4_mod::m4_with_x(
- black_box(x.as_slice()),
- black_box(data.as_slice()),
- black_box(2_000),
- )
- })
- });
- c.bench_function("m4_simdx_50M_f32", |b| {
+ c.bench_function("m4_x_50M_f32", |b| {
b.iter(|| {
m4_mod::m4_with_x(
black_box(x.as_slice()),
@@ -73,16 +49,7 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) {
let data = utils::get_random_array::(n, f32::MIN, f32::MAX);
let x = (0..n).map(|i| i as i32).collect::>();
let all_threads: usize = utils::get_all_threads();
- c.bench_function("m4_scal_p_50M_f32", |b| {
- b.iter(|| {
- m4_mod::m4_without_x_parallel(
- black_box(data.as_slice()),
- black_box(2_000),
- black_box(all_threads),
- )
- })
- });
- c.bench_function("m4_simd_p_50M_f32", |b| {
+ c.bench_function("m4_p_50M_f32", |b| {
b.iter(|| {
m4_mod::m4_without_x_parallel(
black_box(data.as_slice()),
@@ -91,17 +58,7 @@ fn m4_f32_random_array_50M_multi_core(c: &mut Criterion) {
)
})
});
- c.bench_function("m4_scalx_p_50M_f32", |b| {
- b.iter(|| {
- m4_mod::m4_with_x_parallel(
- black_box(x.as_slice()),
- black_box(data.as_slice()),
- black_box(2_000),
- black_box(all_threads),
- )
- })
- });
- c.bench_function("m4_simdx_p_50M_f32", |b| {
+ c.bench_function("m4_x_p_50M_f32", |b| {
b.iter(|| {
m4_mod::m4_with_x_parallel(
black_box(x.as_slice()),
diff --git a/downsample_rs/benches/bench_minmax.rs b/downsample_rs/benches/bench_minmax.rs
index 7311c3c..599cdbd 100644
--- a/downsample_rs/benches/bench_minmax.rs
+++ b/downsample_rs/benches/bench_minmax.rs
@@ -6,10 +6,7 @@ use dev_utils::{config, utils};
fn minmax_f32_random_array_long_single_core(c: &mut Criterion) {
let n = config::ARRAY_LENGTH_LONG;
let data = utils::get_random_array::(n, f32::MIN, f32::MAX);
- c.bench_function("minmax_scal_f32", |b| {
- b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
- });
- c.bench_function("minmax_simd_f32", |b| {
+ c.bench_function("minmax_f32", |b| {
b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
});
}
@@ -18,16 +15,7 @@ fn minmax_f32_random_array_long_multi_core(c: &mut Criterion) {
let n = config::ARRAY_LENGTH_LONG;
let data = utils::get_random_array::(n, f32::MIN, f32::MAX);
let all_threads: usize = utils::get_all_threads();
- c.bench_function("minmax_scal_p_f32", |b| {
- b.iter(|| {
- minmax_mod::min_max_without_x_parallel(
- black_box(data.as_slice()),
- black_box(2_000),
- black_box(all_threads),
- )
- })
- });
- c.bench_function("minmax_simd_p_f32", |b| {
+ c.bench_function("minmax_p_f32", |b| {
b.iter(|| {
minmax_mod::min_max_without_x_parallel(
black_box(data.as_slice()),
@@ -42,22 +30,10 @@ fn minmax_f32_random_array_50M_single_core(c: &mut Criterion) {
let n = 50_000_000;
let data = utils::get_random_array::(n, f32::MIN, f32::MAX);
let x = (0..n).map(|i| i as i32).collect::>();
- c.bench_function("minmax_scal_50M_f32", |b| {
- b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
- });
- c.bench_function("minmax_simd_50M_f32", |b| {
+ c.bench_function("minmax_50M_f32", |b| {
b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(2_000)))
});
- c.bench_function("minmax_scalx_50M_f32", |b| {
- b.iter(|| {
- minmax_mod::min_max_with_x(
- black_box(x.as_slice()),
- black_box(data.as_slice()),
- black_box(2_000),
- )
- })
- });
- c.bench_function("minmax_simdx_50M_f32", |b| {
+ c.bench_function("minmax_x_50M_f32", |b| {
b.iter(|| {
minmax_mod::min_max_with_x(
black_box(x.as_slice()),
@@ -67,16 +43,10 @@ fn minmax_f32_random_array_50M_single_core(c: &mut Criterion) {
})
});
- // c.bench_function("minmax_scal_50M_f32", |b| {
+ // c.bench_function("minmax_50M_f32", |b| {
// b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(60_000)))
// });
- // c.bench_function("minmax_simd_50M_f32", |b| {
- // b.iter(|| minmax_mod::min_max_without_x(black_box(data.as_slice()), black_box(60_000)))
- // });
- // c.bench_function("minmax_scalx_50M_f32", |b| {
- // b.iter(|| minmax_mod::min_max_with_x(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
- // });
- // c.bench_function("minmax_simdx_50M_f32", |b| {
+ // c.bench_function("minmax_x_50M_f32", |b| {
// b.iter(|| minmax_mod::min_max_with_x(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
// });
}
@@ -86,7 +56,7 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
let data = utils::get_random_array::(n, f32::MIN, f32::MAX);
let x = (0..n).map(|i| i as i32).collect::>();
let all_threads: usize = utils::get_all_threads();
- c.bench_function("minmax_scal_p_50M_f32", |b| {
+ c.bench_function("minmax_p_50M_f32", |b| {
b.iter(|| {
minmax_mod::min_max_without_x_parallel(
black_box(data.as_slice()),
@@ -95,26 +65,7 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
)
})
});
- c.bench_function("minmax_simd_p_50M_f32", |b| {
- b.iter(|| {
- minmax_mod::min_max_without_x_parallel(
- black_box(data.as_slice()),
- black_box(2_000),
- black_box(all_threads),
- )
- })
- });
- c.bench_function("minmax_scalx_p_50M_f32", |b| {
- b.iter(|| {
- minmax_mod::min_max_with_x_parallel(
- black_box(x.as_slice()),
- black_box(data.as_slice()),
- black_box(2_000),
- black_box(all_threads),
- )
- })
- });
- c.bench_function("minmax_simdx_p_50M_f32", |b| {
+ c.bench_function("minmax_x_p_50M_f32", |b| {
b.iter(|| {
minmax_mod::min_max_with_x_parallel(
black_box(x.as_slice()),
@@ -125,16 +76,10 @@ fn minmax_f32_random_array_50M_long_multi_core(c: &mut Criterion) {
})
});
- // c.bench_function("minmax_scal_p_50M_f32", |b| {
- // b.iter(|| minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(60_000)))
- // });
- // c.bench_function("minmax_simd_p_50M_f32", |b| {
+ // c.bench_function("minmax_p_50M_f32", |b| {
// b.iter(|| minmax_mod::min_max_without_x_parallel(black_box(data.as_slice()), black_box(60_000)))
// });
- // c.bench_function("minmax_scalx_p_50M_f32", |b| {
- // b.iter(|| minmax_mod::min_max_with_x_parallel(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
- // });
- // c.bench_function("minmax_simdx_p_50M_f32", |b| {
+ // c.bench_function("minmax_x_p_50M_f32", |b| {
// b.iter(|| minmax_mod::min_max_with_x_parallel(black_box(x.as_slice()), black_box(data.as_slice()), black_box(60_000)))
// });
}
diff --git a/downsample_rs/benches/bench_minmaxlttb.rs b/downsample_rs/benches/bench_minmaxlttb.rs
index f8a2610..a0241de 100644
--- a/downsample_rs/benches/bench_minmaxlttb.rs
+++ b/downsample_rs/benches/bench_minmaxlttb.rs
@@ -9,17 +9,7 @@ fn minmaxlttb_f32_random_array_long_single_core(c: &mut Criterion) {
let n = config::ARRAY_LENGTH_LONG;
let x = (0..n).map(|i| i as i32).collect::>();
let y = utils::get_random_array::(n, f32::MIN, f32::MAX);
- c.bench_function("mmlttb_scalx_f32", |b| {
- b.iter(|| {
- minmaxlttb_mod::minmaxlttb_with_x(
- black_box(x.as_slice()),
- black_box(y.as_slice()),
- black_box(2_000),
- black_box(MINMAX_RATIO),
- )
- })
- });
- c.bench_function("mlttb_simdx_f32", |b| {
+ c.bench_function("mlttb_x_f32", |b| {
b.iter(|| {
minmaxlttb_mod::minmaxlttb_with_x(
black_box(x.as_slice()),
@@ -36,18 +26,7 @@ fn minmaxlttb_f32_random_array_long_multi_core(c: &mut Criterion) {
let x = (0..n).map(|i| i as i32).collect::>();
let y = utils::get_random_array::(n, f32::MIN, f32::MAX);
let all_threads: usize = utils::get_all_threads();
- c.bench_function("mmlttb_scalx_p_f32", |b| {
- b.iter(|| {
- minmaxlttb_mod::minmaxlttb_with_x_parallel(
- black_box(x.as_slice()),
- black_box(y.as_slice()),
- black_box(2_000),
- black_box(MINMAX_RATIO),
- black_box(all_threads),
- )
- })
- });
- c.bench_function("mlttb_simdx_p_f32", |b| {
+ c.bench_function("mlttb_x_p_f32", |b| {
b.iter(|| {
minmaxlttb_mod::minmaxlttb_with_x_parallel(
black_box(x.as_slice()),
@@ -64,17 +43,7 @@ fn minmaxlttb_f32_random_array_50M_single_core(c: &mut Criterion) {
let n = 50_000_000;
let x = (0..n).map(|i| i as i32).collect::>();
let y = utils::get_random_array::(n, f32::MIN, f32::MAX);
- c.bench_function("mlttb_scalx_50M_f32", |b| {
- b.iter(|| {
- minmaxlttb_mod::minmaxlttb_with_x(
- black_box(x.as_slice()),
- black_box(y.as_slice()),
- black_box(2_000),
- black_box(MINMAX_RATIO),
- )
- })
- });
- c.bench_function("mlttb_simdx_50M_f32", |b| {
+ c.bench_function("mlttb_x_50M_f32", |b| {
b.iter(|| {
minmaxlttb_mod::minmaxlttb_with_x(
black_box(x.as_slice()),
@@ -91,18 +60,7 @@ fn minmaxlttb_f32_random_array_50M_multi_core(c: &mut Criterion) {
let x = (0..n).map(|i| i as i32).collect::>();
let y = utils::get_random_array::(n, f32::MIN, f32::MAX);
let all_threads: usize = utils::get_all_threads();
- c.bench_function("mlttb_scalx_p_50M_f32", |b| {
- b.iter(|| {
- minmaxlttb_mod::minmaxlttb_with_x_parallel(
- black_box(x.as_slice()),
- black_box(y.as_slice()),
- black_box(2_000),
- black_box(MINMAX_RATIO),
- black_box(all_threads),
- )
- })
- });
- c.bench_function("mlttb_simdx_p_50M_f32", |b| {
+ c.bench_function("mlttb_x_p_50M_f32", |b| {
b.iter(|| {
minmaxlttb_mod::minmaxlttb_with_x_parallel(
black_box(x.as_slice()),
@@ -118,16 +76,7 @@ fn minmaxlttb_f32_random_array_50M_multi_core(c: &mut Criterion) {
fn minmaxlttb_without_x_f32_random_array_50M_single_core(c: &mut Criterion) {
let n = 50_000_000;
let y = utils::get_random_array::(n, f32::MIN, f32::MAX);
- c.bench_function("mlttb_scal_50M_f32", |b| {
- b.iter(|| {
- minmaxlttb_mod::minmaxlttb_without_x(
- black_box(y.as_slice()),
- black_box(2_000),
- black_box(MINMAX_RATIO),
- )
- })
- });
- c.bench_function("mlttb_simd_50M_f32", |b| {
+ c.bench_function("mlttb_50M_f32", |b| {
b.iter(|| {
minmaxlttb_mod::minmaxlttb_without_x(
black_box(y.as_slice()),
@@ -142,17 +91,7 @@ fn minmaxlttb_without_x_f32_random_array_50M_multi_core(c: &mut Criterion) {
let n = 50_000_000;
let y = utils::get_random_array::(n, f32::MIN, f32::MAX);
let all_threads: usize = utils::get_all_threads();
- c.bench_function("mlttb_scal_p_50M_f32", |b| {
- b.iter(|| {
- minmaxlttb_mod::minmaxlttb_without_x_parallel(
- black_box(y.as_slice()),
- black_box(2_000),
- black_box(MINMAX_RATIO),
- black_box(all_threads),
- )
- })
- });
- c.bench_function("mlttb_simd_p_50M_f32", |b| {
+ c.bench_function("mlttb_p_50M_f32", |b| {
b.iter(|| {
minmaxlttb_mod::minmaxlttb_without_x_parallel(
black_box(y.as_slice()),
diff --git a/downsample_rs/src/m4.rs b/downsample_rs/src/m4.rs
index 21ff63c..d64e33b 100644
--- a/downsample_rs/src/m4.rs
+++ b/downsample_rs/src/m4.rs
@@ -104,7 +104,7 @@ m4_without_x_parallel!(m4_without_x_parallel_nan, NaNArgMinMax, |arr| arr
// be the start and end of the bin, which would result in duplicate data in
// the output array. (this is for example the case for monotonic data).
-// ----------------- GENERICS
+// ----------------------------------- GENERICS ------------------------------------
// --------------------- WITHOUT X
diff --git a/downsample_rs/src/minmax.rs b/downsample_rs/src/minmax.rs
index 36df568..840e26f 100644
--- a/downsample_rs/src/minmax.rs
+++ b/downsample_rs/src/minmax.rs
@@ -100,8 +100,8 @@ min_max_without_x_parallel!(min_max_without_x_parallel, ArgMinMax, |arr| arr.arg
min_max_without_x_parallel!(min_max_without_x_parallel_nan, NaNArgMinMax, |arr| arr
.nanargminmax());
-// ----------------- GENERICS
-//
+// ----------------------------------- GENERICS ------------------------------------
+
// --------------------- WITHOUT X
#[inline(always)]
diff --git a/downsample_rs/src/minmaxlttb.rs b/downsample_rs/src/minmaxlttb.rs
index 3f78412..9d8017a 100644
--- a/downsample_rs/src/minmaxlttb.rs
+++ b/downsample_rs/src/minmaxlttb.rs
@@ -260,8 +260,8 @@ where
.map(|i| *y.get_unchecked(*i))
.collect::>()
};
- // Apply lttb on the reduced data
- let index_points_selected = lttb_without_x(y.as_slice(), n_out);
+ // Apply lttb on the reduced data (using the preselect data its index)
+ let index_points_selected = lttb_with_x(index.as_slice(), y.as_slice(), n_out);
// Return the original index
return index_points_selected
.iter()
diff --git a/tsdownsample/downsamplers.py b/tsdownsample/downsamplers.py
index 9bca439..16c0a9b 100644
--- a/tsdownsample/downsamplers.py
+++ b/tsdownsample/downsamplers.py
@@ -73,7 +73,7 @@ def rust_mod(self):
return _tsdownsample_rs.minmaxlttb
def downsample(
- self, *args, n_out: int, minmax_ratio: int = 30, n_threads: int = 1, **_
+ self, *args, n_out: int, minmax_ratio: int = 4, n_threads: int = 1, **_
):
assert minmax_ratio > 0, "minmax_ratio must be greater than 0"
return super().downsample(