From a9dffc3d09742a52c7f60c6a031f5fa84c25bde3 Mon Sep 17 00:00:00 2001 From: gord chung <5091603+chungg@users.noreply.github.com> Date: Mon, 26 Aug 2024 09:49:12 -0400 Subject: [PATCH] accept any numeric type - stats (#83) --- src/smooth.rs | 33 +++++--- src/statistic/distribution.rs | 142 +++++++++++++++++++++------------- src/statistic/regression.rs | 51 ++++++++---- 3 files changed, 146 insertions(+), 80 deletions(-) diff --git a/src/smooth.rs b/src/smooth.rs index 3ab0a65..d24116c 100644 --- a/src/smooth.rs +++ b/src/smooth.rs @@ -39,7 +39,11 @@ pub enum MaMode { /// /// smooth::ma(&vec![1.0,2.0,3.0,4.0,5.0], 3, smooth::MaMode::SMA).collect::<Vec<f64>>(); /// ``` -pub fn ma(data: &[f64], window: usize, mamode: MaMode) -> Box<dyn Iterator<Item = f64> + '_> { +pub fn ma<N: ToPrimitive>( + data: &[N], + window: usize, + mamode: MaMode, +) -> Box<dyn Iterator<Item = f64> + '_> { match mamode { MaMode::SMA => Box::new(sma(data, window)), MaMode::EWMA => Box::new(ewma(data, window)), @@ -307,22 +311,27 @@ pub fn hull<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f /// /// smooth::vidya(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); /// ``` -pub fn vidya(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { +pub fn vidya<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ { let alpha = 2.0 / (window + 1) as f64; let std5 = _std_dev(data, 5).collect::<Vec<f64>>(); let std20 = sma(&std5, 20).collect::<Vec<f64>>(); let offset = (5 - 1) + (20 - 1); - iter::repeat(f64::NAN).take(offset).chain( - izip!( - std20.into_iter().skip(20 - 1), - std5.into_iter().skip(20 - 1), - data.iter().skip(offset) + iter::repeat(f64::NAN) + .take(offset) + .chain( + izip!( + std20.into_iter().skip(20 - 1), + std5.into_iter().skip(20 - 1), + data.iter().skip(offset) + ) + .scan(0.0, move |state, (s20, s5, d)| { + *state = alpha * (s5 / s20) * (d.to_f64().unwrap() - *state) + *state; + Some(*state) + }), + // TODO: investigate why faster with collect().iter() than without ) - .scan(0.0, move |state, (s20, s5, d)| { - *state = alpha * (s5 / s20) * (d - *state) + *state; - Some(*state) - }), - ) + .collect::<Vec<f64>>() + .into_iter() } pub(crate) fn _cmo<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ { diff --git a/src/statistic/distribution.rs b/src/statistic/distribution.rs index d74076f..a62cc47 100644 --- a/src/statistic/distribution.rs +++ b/src/statistic/distribution.rs @@ -6,6 +6,8 @@ use std::cmp::Ordering; use std::iter; +use num_traits::cast::ToPrimitive; + /// Variance /// /// A measure of how far a set of numbers is spread out from their average value. Provides @@ -22,19 +24,29 @@ use std::iter; /// /// distribution::variance(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); /// ``` -pub fn variance(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { +pub fn variance<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ { iter::repeat(f64::NAN) .take(window - 1) .chain(data.windows(window).map(move |w| { - let mean = w.iter().sum::<f64>() / window as f64; - w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64 + let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64; + w.iter() + .filter_map(|x| x.to_f64()) + .fold(0.0, |acc, x| acc + (x - mean).powi(2)) + / window as f64 })) } -pub(crate) fn _std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { +pub(crate) fn _std_dev<N: ToPrimitive>( + data: &[N], + window: usize, +) -> impl Iterator<Item = f64> + '_ { data.windows(window).map(move |w| { - let mean = w.iter().sum::<f64>() / window as f64; - (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64).sqrt() + let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64; + (w.iter() + .filter_map(|x| x.to_f64()) + .fold(0.0, |acc, x| acc + (x - mean).powi(2)) + / window as f64) + .sqrt() }) } @@ -53,7 +65,7 @@ pub(crate) fn _std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> /// /// distribution::std_dev(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); /// ``` -pub fn std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { +pub fn std_dev<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ { iter::repeat(f64::NAN) .take(window - 1) .chain(_std_dev(data, window)) @@ -78,14 +90,18 @@ pub fn std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { /// /// distribution::zscore(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); /// ``` -pub fn zscore(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { +pub fn zscore<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ { iter::repeat(f64::NAN) .take(window - 1) .chain(data.windows(window).map(move |w| { - let mean = w.iter().sum::<f64>() / window as f64; - let stdev = - (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64).sqrt(); - (w[window - 1] - mean) / stdev + let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64; + let stdev = (w + .iter() + .filter_map(|x| x.to_f64()) + .fold(0.0, |acc, x| acc + (x - mean).powi(2)) + / window as f64) + .sqrt(); + (w[window - 1].to_f64().unwrap() - mean) / stdev })) } @@ -109,12 +125,15 @@ pub fn zscore(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { /// /// distribution::mad(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); /// ``` -pub fn mad(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { +pub fn mad<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ { iter::repeat(f64::NAN) .take(window - 1) .chain(data.windows(window).map(move |w| { - let mean = w.iter().sum::<f64>() / window as f64; - w.iter().fold(0.0, |acc, x| acc + (x - mean).abs()) / window as f64 + let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64; + w.iter() + .filter_map(|x| x.to_f64()) + .fold(0.0, |acc, x| acc + (x - mean).abs()) + / window as f64 })) } @@ -139,12 +158,17 @@ pub fn mad(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { /// /// distribution::cv(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); /// ``` -pub fn cv(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { +pub fn cv<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ { iter::repeat(f64::NAN) .take(window - 1) .chain(data.windows(window).map(move |w| { - let mean = w.iter().sum::<f64>() / window as f64; - (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64).sqrt() / mean + let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64; + (w.iter() + .filter_map(|x| x.to_f64()) + .fold(0.0, |acc, x| acc + (x - mean).powi(2)) + / window as f64) + .sqrt() + / mean })) } @@ -168,15 +192,21 @@ pub fn cv(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { /// /// distribution::kurtosis(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); /// ``` -pub fn kurtosis(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { +pub fn kurtosis<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ { let adj1 = ((window + 1) * window * (window - 1)) as f64 / ((window - 2) * (window - 3)) as f64; let adj2 = 3.0 * (window - 1).pow(2) as f64 / ((window - 2) * (window - 3)) as f64; iter::repeat(f64::NAN) .take(window - 1) .chain(data.windows(window).map(move |w| { - let mean = w.iter().sum::<f64>() / window as f64; - let k4 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(4)); - let k2 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)); + let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64; + let k4 = w + .iter() + .filter_map(|x| x.to_f64()) + .fold(0.0, |acc, x| acc + (x - mean).powi(4)); + let k2 = w + .iter() + .filter_map(|x| x.to_f64()) + .fold(0.0, |acc, x| acc + (x - mean).powi(2)); adj1 * k4 / k2.powi(2) - adj2 })) } @@ -208,25 +238,33 @@ pub fn kurtosis(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { /// /// distribution::skew(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); /// ``` -pub fn skew(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { +pub fn skew<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ { iter::repeat(f64::NAN) .take(window - 1) .chain(data.windows(window).map(move |w| { - let mean = w.iter().sum::<f64>() / window as f64; - let m3 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(3)) / window as f64; - let m2 = (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64) + let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64; + let m3 = w + .iter() + .filter_map(|x| x.to_f64()) + .fold(0.0, |acc, x| acc + (x - mean).powi(3)) + / window as f64; + let m2 = (w + .iter() + .filter_map(|x| x.to_f64()) + .fold(0.0, |acc, x| acc + (x - mean).powi(2)) + / window as f64) .powf(3.0 / 2.0); ((window * (window - 1)) as f64).sqrt() / (window - 2) as f64 * m3 / m2 })) } -fn quickselect(data: &mut [f64], k: usize) -> f64 { +fn quickselect<N: ToPrimitive + PartialOrd + Clone>(data: &mut [N], k: usize) -> N { // iterative solution is faster than recursive let mut lo = 0; let mut hi = data.len() - 1; while lo < hi { // Lomuto partition - let pivot = data[hi]; + let pivot = data[hi].clone(); let mut i = lo; for j in lo..hi { if data[j] < pivot { @@ -237,12 +275,12 @@ fn quickselect(data: &mut [f64], k: usize) -> f64 { data.swap(i, hi); match i.cmp(&k) { - Ordering::Equal => return data[k], + Ordering::Equal => return data[k].clone(), Ordering::Greater => hi = i - 1, Ordering::Less => lo = i + 1, }; } - data[k] + data[k].clone() } /// Median @@ -257,24 +295,11 @@ fn quickselect(data: &mut [f64], k: usize) -> f64 { /// /// distribution::median(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); /// ``` -pub fn median(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { - iter::repeat(f64::NAN) - .take(window - 1) - .chain(data.windows(window).map(move |w| { - let mut w = w.to_vec(); - match window { - even if even % 2 == 0 => { - let fst_med = quickselect(&mut w, (even / 2) - 1); - // don't qsort again, just get min of remainder - let snd_med = w[even / 2..] - .iter() - .fold(f64::NAN, |state, &x| state.min(x)); - - (fst_med + snd_med) * 0.5 - } - odd => quickselect(&mut w, odd / 2), - } - })) +pub fn median<N: ToPrimitive + PartialOrd + Clone>( + data: &[N], + window: usize, +) -> impl Iterator<Item = f64> + '_ { + quantile(data, window, 50.0) } /// Quantile @@ -289,20 +314,26 @@ pub fn median(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { /// /// distribution::quantile(&vec![1.0,2.0,3.0,4.0,5.0], 3, 90.0).collect::<Vec<f64>>(); /// ``` -pub fn quantile(data: &[f64], window: usize, q: f64) -> impl Iterator<Item = f64> + '_ { +pub fn quantile<N: ToPrimitive + PartialOrd + Clone>( + data: &[N], + window: usize, + q: f64, +) -> impl Iterator<Item = f64> + '_ { iter::repeat(f64::NAN) .take(window - 1) .chain(data.windows(window).map(move |w| { let pos = (window - 1) as f64 * (q.clamp(0.0, 100.0) / 100.0); let mut w = w.to_vec(); match pos { - exact if exact.fract() == 0.0 => quickselect(&mut w, exact as usize), + exact if exact.fract() == 0.0 => { + quickselect(&mut w, exact as usize).to_f64().unwrap() + } _ => { - let lower = quickselect(&mut w, pos.floor() as usize); + let lower = quickselect(&mut w, pos.floor() as usize).to_f64().unwrap(); // don't qsort again, just get min of remainder let upper = w[pos.ceil() as usize..] .iter() - .fold(f64::NAN, |state, &x| state.min(x)); + .fold(f64::NAN, |state, x| state.min(x.to_f64().unwrap())); lower * (pos.ceil() - pos) + upper * (pos - pos.floor()) } @@ -376,7 +407,10 @@ impl<T: PartialOrd + Clone> SortExt<T> for [T] { /// Some(distribution::RankMode::Ordinal) /// ).collect::<Vec<_>>(); /// ``` -pub fn rank(data: &[f64], mode: Option<RankMode>) -> impl Iterator<Item = f64> + '_ { +pub fn rank<N: ToPrimitive + PartialOrd + Clone>( + data: &[N], + mode: Option<RankMode>, +) -> impl Iterator<Item = f64> + '_ { let mut result = vec![0.; data.len()]; let indices = data.argsort(); @@ -387,7 +421,7 @@ pub fn rank(data: &[f64], mode: Option<RankMode>) -> impl Iterator<Item = f64> + .for_each(|(i, val)| result[val] = i as f64); } _ => { - let mut sorted = indices.iter().map(|&i| data[i]); + let mut sorted = indices.iter().map(|&i| data[i].clone()); let x1 = sorted.next().unwrap(); let uniq_indices = iter::once(true) .chain(sorted.scan(x1, |state, x| { diff --git a/src/statistic/regression.rs b/src/statistic/regression.rs index deb63cc..1f8e1bc 100644 --- a/src/statistic/regression.rs +++ b/src/statistic/regression.rs @@ -5,6 +5,8 @@ //! determing causal relationships.[[1]](https://en.wikipedia.org/wiki/Regression_analysis) use std::iter; +use num_traits::cast::ToPrimitive; + /// Mean Squared Error /// /// Measures average squared difference between estimated and actual values. @@ -24,12 +26,14 @@ use std::iter; /// /// regression::mse(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>(); /// ``` -pub fn mse<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a { +pub fn mse<'a, N: ToPrimitive>(data: &'a [N], estimate: &'a [N]) -> impl Iterator<Item = f64> + 'a { data.iter() .enumerate() .zip(estimate) .scan(0.0, |state, ((cnt, observe), est)| { - *state += (observe - est).powi(2).max(0.0); + *state += (observe.to_f64().unwrap() - est.to_f64().unwrap()) + .powi(2) + .max(0.0); Some(*state / (cnt + 1) as f64) }) } @@ -53,12 +57,17 @@ pub fn mse<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64 /// /// regression::rmse(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>(); /// ``` -pub fn rmse<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a { +pub fn rmse<'a, N: ToPrimitive>( + data: &'a [N], + estimate: &'a [N], +) -> impl Iterator<Item = f64> + 'a { data.iter() .enumerate() .zip(estimate) .scan(0.0, |state, ((cnt, observe), est)| { - *state += (observe - est).powi(2).max(0.0); + *state += (observe.to_f64().unwrap() - est.to_f64().unwrap()) + .powi(2) + .max(0.0); Some((*state / (cnt + 1) as f64).sqrt()) }) } @@ -83,12 +92,14 @@ pub fn rmse<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f6 /// /// regression::mae(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>(); /// ``` -pub fn mae<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a { +pub fn mae<'a, N: ToPrimitive>(data: &'a [N], estimate: &'a [N]) -> impl Iterator<Item = f64> + 'a { data.iter() .enumerate() .zip(estimate) .scan(0.0, |state, ((cnt, observe), est)| { - *state += (observe - est).abs().max(0.0); + *state += (observe.to_f64().unwrap() - est.to_f64().unwrap()) + .abs() + .max(0.0); Some(*state / (cnt + 1) as f64) }) } @@ -112,12 +123,18 @@ pub fn mae<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64 /// /// regression::mape(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>(); /// ``` -pub fn mape<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a { +pub fn mape<'a, N: ToPrimitive>( + data: &'a [N], + estimate: &'a [N], +) -> impl Iterator<Item = f64> + 'a { data.iter() .enumerate() .zip(estimate) .scan(0.0, |state, ((cnt, observe), est)| { - *state += ((observe - est) / observe).abs().max(0.0); + *state += ((observe.to_f64().unwrap() - est.to_f64().unwrap()) + / observe.to_f64().unwrap()) + .abs() + .max(0.0); Some(100.0 * *state / (cnt + 1) as f64) }) } @@ -142,12 +159,17 @@ pub fn mape<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f6 /// /// regression::smape(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>(); /// ``` -pub fn smape<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a { +pub fn smape<'a, N: ToPrimitive>( + data: &'a [N], + estimate: &'a [N], +) -> impl Iterator<Item = f64> + 'a { data.iter() .enumerate() .zip(estimate) .scan(0.0, |state, ((cnt, observe), est)| { - *state += ((observe - est).abs() / ((observe.abs() + est.abs()) / 2.0)).max(0.0); + *state += ((observe.to_f64().unwrap() - est.to_f64().unwrap()).abs() + / ((observe.to_f64().unwrap().abs() + est.to_f64().unwrap().abs()) / 2.0)) + .max(0.0); Some(100.0 * *state / (cnt + 1) as f64) }) } @@ -172,12 +194,13 @@ pub fn smape<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f /// /// regression::mda(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>(); /// ``` -pub fn mda<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a { +pub fn mda<'a, N: ToPrimitive>(data: &'a [N], estimate: &'a [N]) -> impl Iterator<Item = f64> + 'a { iter::once(f64::NAN).chain(data[1..].iter().enumerate().zip(&estimate[1..]).scan( - (0.0, data[0]), + (0.0, data[0].to_f64().unwrap()), |state, ((cnt, observe), est)| { - let dir = ((observe - state.1).signum() == (est - state.1).signum()) as u8 as f64; - *state = (state.0 + dir, *observe); + let dir = ((observe.to_f64().unwrap() - state.1).signum() + == (est.to_f64().unwrap() - state.1).signum()) as u8 as f64; + *state = (state.0 + dir, observe.to_f64().unwrap()); Some(state.0 / (cnt + 1) as f64) }, ))