From a9dffc3d09742a52c7f60c6a031f5fa84c25bde3 Mon Sep 17 00:00:00 2001
From: gord chung <5091603+chungg@users.noreply.github.com>
Date: Mon, 26 Aug 2024 09:49:12 -0400
Subject: [PATCH] accept any numeric type - stats (#83)

---
 src/smooth.rs                 |  33 +++++---
 src/statistic/distribution.rs | 142 +++++++++++++++++++++-------------
 src/statistic/regression.rs   |  51 ++++++++----
 3 files changed, 146 insertions(+), 80 deletions(-)

diff --git a/src/smooth.rs b/src/smooth.rs
index 3ab0a65..d24116c 100644
--- a/src/smooth.rs
+++ b/src/smooth.rs
@@ -39,7 +39,11 @@ pub enum MaMode {
 ///
 /// smooth::ma(&vec![1.0,2.0,3.0,4.0,5.0], 3, smooth::MaMode::SMA).collect::<Vec<f64>>();
 /// ```
-pub fn ma(data: &[f64], window: usize, mamode: MaMode) -> Box<dyn Iterator<Item = f64> + '_> {
+pub fn ma<N: ToPrimitive>(
+    data: &[N],
+    window: usize,
+    mamode: MaMode,
+) -> Box<dyn Iterator<Item = f64> + '_> {
     match mamode {
         MaMode::SMA => Box::new(sma(data, window)),
         MaMode::EWMA => Box::new(ewma(data, window)),
@@ -307,22 +311,27 @@ pub fn hull<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f
 ///
 /// smooth::vidya(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
 /// ```
-pub fn vidya(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
+pub fn vidya<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ {
     let alpha = 2.0 / (window + 1) as f64;
     let std5 = _std_dev(data, 5).collect::<Vec<f64>>();
     let std20 = sma(&std5, 20).collect::<Vec<f64>>();
     let offset = (5 - 1) + (20 - 1);
-    iter::repeat(f64::NAN).take(offset).chain(
-        izip!(
-            std20.into_iter().skip(20 - 1),
-            std5.into_iter().skip(20 - 1),
-            data.iter().skip(offset)
+    iter::repeat(f64::NAN)
+        .take(offset)
+        .chain(
+            izip!(
+                std20.into_iter().skip(20 - 1),
+                std5.into_iter().skip(20 - 1),
+                data.iter().skip(offset)
+            )
+            .scan(0.0, move |state, (s20, s5, d)| {
+                *state = alpha * (s5 / s20) * (d.to_f64().unwrap() - *state) + *state;
+                Some(*state)
+            }),
+            // TODO: investigate why faster with collect().iter() than without
         )
-        .scan(0.0, move |state, (s20, s5, d)| {
-            *state = alpha * (s5 / s20) * (d - *state) + *state;
-            Some(*state)
-        }),
-    )
+        .collect::<Vec<f64>>()
+        .into_iter()
 }
 
 pub(crate) fn _cmo<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ {
diff --git a/src/statistic/distribution.rs b/src/statistic/distribution.rs
index d74076f..a62cc47 100644
--- a/src/statistic/distribution.rs
+++ b/src/statistic/distribution.rs
@@ -6,6 +6,8 @@
 use std::cmp::Ordering;
 use std::iter;
 
+use num_traits::cast::ToPrimitive;
+
 /// Variance
 ///
 /// A measure of how far a set of numbers is spread out from their average value. Provides
@@ -22,19 +24,29 @@ use std::iter;
 ///
 /// distribution::variance(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
 /// ```
-pub fn variance(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
+pub fn variance<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ {
     iter::repeat(f64::NAN)
         .take(window - 1)
         .chain(data.windows(window).map(move |w| {
-            let mean = w.iter().sum::<f64>() / window as f64;
-            w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64
+            let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64;
+            w.iter()
+                .filter_map(|x| x.to_f64())
+                .fold(0.0, |acc, x| acc + (x - mean).powi(2))
+                / window as f64
         }))
 }
 
-pub(crate) fn _std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
+pub(crate) fn _std_dev<N: ToPrimitive>(
+    data: &[N],
+    window: usize,
+) -> impl Iterator<Item = f64> + '_ {
     data.windows(window).map(move |w| {
-        let mean = w.iter().sum::<f64>() / window as f64;
-        (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64).sqrt()
+        let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64;
+        (w.iter()
+            .filter_map(|x| x.to_f64())
+            .fold(0.0, |acc, x| acc + (x - mean).powi(2))
+            / window as f64)
+            .sqrt()
     })
 }
 
@@ -53,7 +65,7 @@ pub(crate) fn _std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64>
 ///
 /// distribution::std_dev(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
 /// ```
-pub fn std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
+pub fn std_dev<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ {
     iter::repeat(f64::NAN)
         .take(window - 1)
         .chain(_std_dev(data, window))
@@ -78,14 +90,18 @@ pub fn std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
 ///
 /// distribution::zscore(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
 /// ```
-pub fn zscore(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
+pub fn zscore<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ {
     iter::repeat(f64::NAN)
         .take(window - 1)
         .chain(data.windows(window).map(move |w| {
-            let mean = w.iter().sum::<f64>() / window as f64;
-            let stdev =
-                (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64).sqrt();
-            (w[window - 1] - mean) / stdev
+            let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64;
+            let stdev = (w
+                .iter()
+                .filter_map(|x| x.to_f64())
+                .fold(0.0, |acc, x| acc + (x - mean).powi(2))
+                / window as f64)
+                .sqrt();
+            (w[window - 1].to_f64().unwrap() - mean) / stdev
         }))
 }
 
@@ -109,12 +125,15 @@ pub fn zscore(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
 ///
 /// distribution::mad(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
 /// ```
-pub fn mad(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
+pub fn mad<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ {
     iter::repeat(f64::NAN)
         .take(window - 1)
         .chain(data.windows(window).map(move |w| {
-            let mean = w.iter().sum::<f64>() / window as f64;
-            w.iter().fold(0.0, |acc, x| acc + (x - mean).abs()) / window as f64
+            let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64;
+            w.iter()
+                .filter_map(|x| x.to_f64())
+                .fold(0.0, |acc, x| acc + (x - mean).abs())
+                / window as f64
         }))
 }
 
@@ -139,12 +158,17 @@ pub fn mad(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
 ///
 /// distribution::cv(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
 /// ```
-pub fn cv(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
+pub fn cv<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ {
     iter::repeat(f64::NAN)
         .take(window - 1)
         .chain(data.windows(window).map(move |w| {
-            let mean = w.iter().sum::<f64>() / window as f64;
-            (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64).sqrt() / mean
+            let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64;
+            (w.iter()
+                .filter_map(|x| x.to_f64())
+                .fold(0.0, |acc, x| acc + (x - mean).powi(2))
+                / window as f64)
+                .sqrt()
+                / mean
         }))
 }
 
@@ -168,15 +192,21 @@ pub fn cv(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
 ///
 /// distribution::kurtosis(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
 /// ```
-pub fn kurtosis(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
+pub fn kurtosis<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ {
     let adj1 = ((window + 1) * window * (window - 1)) as f64 / ((window - 2) * (window - 3)) as f64;
     let adj2 = 3.0 * (window - 1).pow(2) as f64 / ((window - 2) * (window - 3)) as f64;
     iter::repeat(f64::NAN)
         .take(window - 1)
         .chain(data.windows(window).map(move |w| {
-            let mean = w.iter().sum::<f64>() / window as f64;
-            let k4 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(4));
-            let k2 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2));
+            let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64;
+            let k4 = w
+                .iter()
+                .filter_map(|x| x.to_f64())
+                .fold(0.0, |acc, x| acc + (x - mean).powi(4));
+            let k2 = w
+                .iter()
+                .filter_map(|x| x.to_f64())
+                .fold(0.0, |acc, x| acc + (x - mean).powi(2));
             adj1 * k4 / k2.powi(2) - adj2
         }))
 }
@@ -208,25 +238,33 @@ pub fn kurtosis(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
 ///
 /// distribution::skew(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
 /// ```
-pub fn skew(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
+pub fn skew<N: ToPrimitive>(data: &[N], window: usize) -> impl Iterator<Item = f64> + '_ {
     iter::repeat(f64::NAN)
         .take(window - 1)
         .chain(data.windows(window).map(move |w| {
-            let mean = w.iter().sum::<f64>() / window as f64;
-            let m3 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(3)) / window as f64;
-            let m2 = (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64)
+            let mean = w.iter().filter_map(|x| x.to_f64()).sum::<f64>() / window as f64;
+            let m3 = w
+                .iter()
+                .filter_map(|x| x.to_f64())
+                .fold(0.0, |acc, x| acc + (x - mean).powi(3))
+                / window as f64;
+            let m2 = (w
+                .iter()
+                .filter_map(|x| x.to_f64())
+                .fold(0.0, |acc, x| acc + (x - mean).powi(2))
+                / window as f64)
                 .powf(3.0 / 2.0);
             ((window * (window - 1)) as f64).sqrt() / (window - 2) as f64 * m3 / m2
         }))
 }
 
-fn quickselect(data: &mut [f64], k: usize) -> f64 {
+fn quickselect<N: ToPrimitive + PartialOrd + Clone>(data: &mut [N], k: usize) -> N {
     // iterative solution is faster than recursive
     let mut lo = 0;
     let mut hi = data.len() - 1;
     while lo < hi {
         // Lomuto partition
-        let pivot = data[hi];
+        let pivot = data[hi].clone();
         let mut i = lo;
         for j in lo..hi {
             if data[j] < pivot {
@@ -237,12 +275,12 @@ fn quickselect(data: &mut [f64], k: usize) -> f64 {
         data.swap(i, hi);
 
         match i.cmp(&k) {
-            Ordering::Equal => return data[k],
+            Ordering::Equal => return data[k].clone(),
             Ordering::Greater => hi = i - 1,
             Ordering::Less => lo = i + 1,
         };
     }
-    data[k]
+    data[k].clone()
 }
 
 /// Median
@@ -257,24 +295,11 @@ fn quickselect(data: &mut [f64], k: usize) -> f64 {
 ///
 /// distribution::median(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
 /// ```
-pub fn median(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
-    iter::repeat(f64::NAN)
-        .take(window - 1)
-        .chain(data.windows(window).map(move |w| {
-            let mut w = w.to_vec();
-            match window {
-                even if even % 2 == 0 => {
-                    let fst_med = quickselect(&mut w, (even / 2) - 1);
-                    // don't qsort again, just get min of remainder
-                    let snd_med = w[even / 2..]
-                        .iter()
-                        .fold(f64::NAN, |state, &x| state.min(x));
-
-                    (fst_med + snd_med) * 0.5
-                }
-                odd => quickselect(&mut w, odd / 2),
-            }
-        }))
+pub fn median<N: ToPrimitive + PartialOrd + Clone>(
+    data: &[N],
+    window: usize,
+) -> impl Iterator<Item = f64> + '_ {
+    quantile(data, window, 50.0)
 }
 
 /// Quantile
@@ -289,20 +314,26 @@ pub fn median(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
 ///
 /// distribution::quantile(&vec![1.0,2.0,3.0,4.0,5.0], 3, 90.0).collect::<Vec<f64>>();
 /// ```
-pub fn quantile(data: &[f64], window: usize, q: f64) -> impl Iterator<Item = f64> + '_ {
+pub fn quantile<N: ToPrimitive + PartialOrd + Clone>(
+    data: &[N],
+    window: usize,
+    q: f64,
+) -> impl Iterator<Item = f64> + '_ {
     iter::repeat(f64::NAN)
         .take(window - 1)
         .chain(data.windows(window).map(move |w| {
             let pos = (window - 1) as f64 * (q.clamp(0.0, 100.0) / 100.0);
             let mut w = w.to_vec();
             match pos {
-                exact if exact.fract() == 0.0 => quickselect(&mut w, exact as usize),
+                exact if exact.fract() == 0.0 => {
+                    quickselect(&mut w, exact as usize).to_f64().unwrap()
+                }
                 _ => {
-                    let lower = quickselect(&mut w, pos.floor() as usize);
+                    let lower = quickselect(&mut w, pos.floor() as usize).to_f64().unwrap();
                     // don't qsort again, just get min of remainder
                     let upper = w[pos.ceil() as usize..]
                         .iter()
-                        .fold(f64::NAN, |state, &x| state.min(x));
+                        .fold(f64::NAN, |state, x| state.min(x.to_f64().unwrap()));
 
                     lower * (pos.ceil() - pos) + upper * (pos - pos.floor())
                 }
@@ -376,7 +407,10 @@ impl<T: PartialOrd + Clone> SortExt<T> for [T] {
 ///     Some(distribution::RankMode::Ordinal)
 /// ).collect::<Vec<_>>();
 /// ```
-pub fn rank(data: &[f64], mode: Option<RankMode>) -> impl Iterator<Item = f64> + '_ {
+pub fn rank<N: ToPrimitive + PartialOrd + Clone>(
+    data: &[N],
+    mode: Option<RankMode>,
+) -> impl Iterator<Item = f64> + '_ {
     let mut result = vec![0.; data.len()];
     let indices = data.argsort();
 
@@ -387,7 +421,7 @@ pub fn rank(data: &[f64], mode: Option<RankMode>) -> impl Iterator<Item = f64> +
                 .for_each(|(i, val)| result[val] = i as f64);
         }
         _ => {
-            let mut sorted = indices.iter().map(|&i| data[i]);
+            let mut sorted = indices.iter().map(|&i| data[i].clone());
             let x1 = sorted.next().unwrap();
             let uniq_indices = iter::once(true)
                 .chain(sorted.scan(x1, |state, x| {
diff --git a/src/statistic/regression.rs b/src/statistic/regression.rs
index deb63cc..1f8e1bc 100644
--- a/src/statistic/regression.rs
+++ b/src/statistic/regression.rs
@@ -5,6 +5,8 @@
 //! determing causal relationships.[[1]](https://en.wikipedia.org/wiki/Regression_analysis)
 use std::iter;
 
+use num_traits::cast::ToPrimitive;
+
 /// Mean Squared Error
 ///
 /// Measures average squared difference between estimated and actual values.
@@ -24,12 +26,14 @@ use std::iter;
 ///
 /// regression::mse(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>();
 /// ```
-pub fn mse<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a {
+pub fn mse<'a, N: ToPrimitive>(data: &'a [N], estimate: &'a [N]) -> impl Iterator<Item = f64> + 'a {
     data.iter()
         .enumerate()
         .zip(estimate)
         .scan(0.0, |state, ((cnt, observe), est)| {
-            *state += (observe - est).powi(2).max(0.0);
+            *state += (observe.to_f64().unwrap() - est.to_f64().unwrap())
+                .powi(2)
+                .max(0.0);
             Some(*state / (cnt + 1) as f64)
         })
 }
@@ -53,12 +57,17 @@ pub fn mse<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64
 ///
 /// regression::rmse(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>();
 /// ```
-pub fn rmse<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a {
+pub fn rmse<'a, N: ToPrimitive>(
+    data: &'a [N],
+    estimate: &'a [N],
+) -> impl Iterator<Item = f64> + 'a {
     data.iter()
         .enumerate()
         .zip(estimate)
         .scan(0.0, |state, ((cnt, observe), est)| {
-            *state += (observe - est).powi(2).max(0.0);
+            *state += (observe.to_f64().unwrap() - est.to_f64().unwrap())
+                .powi(2)
+                .max(0.0);
             Some((*state / (cnt + 1) as f64).sqrt())
         })
 }
@@ -83,12 +92,14 @@ pub fn rmse<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f6
 ///
 /// regression::mae(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>();
 /// ```
-pub fn mae<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a {
+pub fn mae<'a, N: ToPrimitive>(data: &'a [N], estimate: &'a [N]) -> impl Iterator<Item = f64> + 'a {
     data.iter()
         .enumerate()
         .zip(estimate)
         .scan(0.0, |state, ((cnt, observe), est)| {
-            *state += (observe - est).abs().max(0.0);
+            *state += (observe.to_f64().unwrap() - est.to_f64().unwrap())
+                .abs()
+                .max(0.0);
             Some(*state / (cnt + 1) as f64)
         })
 }
@@ -112,12 +123,18 @@ pub fn mae<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64
 ///
 /// regression::mape(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>();
 /// ```
-pub fn mape<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a {
+pub fn mape<'a, N: ToPrimitive>(
+    data: &'a [N],
+    estimate: &'a [N],
+) -> impl Iterator<Item = f64> + 'a {
     data.iter()
         .enumerate()
         .zip(estimate)
         .scan(0.0, |state, ((cnt, observe), est)| {
-            *state += ((observe - est) / observe).abs().max(0.0);
+            *state += ((observe.to_f64().unwrap() - est.to_f64().unwrap())
+                / observe.to_f64().unwrap())
+            .abs()
+            .max(0.0);
             Some(100.0 * *state / (cnt + 1) as f64)
         })
 }
@@ -142,12 +159,17 @@ pub fn mape<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f6
 ///
 /// regression::smape(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>();
 /// ```
-pub fn smape<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a {
+pub fn smape<'a, N: ToPrimitive>(
+    data: &'a [N],
+    estimate: &'a [N],
+) -> impl Iterator<Item = f64> + 'a {
     data.iter()
         .enumerate()
         .zip(estimate)
         .scan(0.0, |state, ((cnt, observe), est)| {
-            *state += ((observe - est).abs() / ((observe.abs() + est.abs()) / 2.0)).max(0.0);
+            *state += ((observe.to_f64().unwrap() - est.to_f64().unwrap()).abs()
+                / ((observe.to_f64().unwrap().abs() + est.to_f64().unwrap().abs()) / 2.0))
+                .max(0.0);
             Some(100.0 * *state / (cnt + 1) as f64)
         })
 }
@@ -172,12 +194,13 @@ pub fn smape<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f
 ///
 /// regression::mda(&vec![1.0,2.0,3.0,4.0,5.0], &vec![1.0,2.0,3.0,4.0,5.0]).collect::<Vec<f64>>();
 /// ```
-pub fn mda<'a>(data: &'a [f64], estimate: &'a [f64]) -> impl Iterator<Item = f64> + 'a {
+pub fn mda<'a, N: ToPrimitive>(data: &'a [N], estimate: &'a [N]) -> impl Iterator<Item = f64> + 'a {
     iter::once(f64::NAN).chain(data[1..].iter().enumerate().zip(&estimate[1..]).scan(
-        (0.0, data[0]),
+        (0.0, data[0].to_f64().unwrap()),
         |state, ((cnt, observe), est)| {
-            let dir = ((observe - state.1).signum() == (est - state.1).signum()) as u8 as f64;
-            *state = (state.0 + dir, *observe);
+            let dir = ((observe.to_f64().unwrap() - state.1).signum()
+                == (est.to_f64().unwrap() - state.1).signum()) as u8 as f64;
+            *state = (state.0 + dir, observe.to_f64().unwrap());
             Some(state.0 / (cnt + 1) as f64)
         },
     ))