Skip to content

Commit

Permalink
add statistical functions
Browse files Browse the repository at this point in the history
  • Loading branch information
chungg committed Jul 10, 2024
1 parent 0c9996c commit e6d9069
Show file tree
Hide file tree
Showing 9 changed files with 739 additions and 13 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ encouraged.
- cargo test
- cargo bench
- cargo run --example file_json
-

## todo
- handle div by zero scenarios
- allow other numeric types rather than just f64
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

pub mod momentum;
pub mod smooth;
pub mod statistic;
pub mod trend;
pub mod volatility;
pub mod volume;
11 changes: 3 additions & 8 deletions src/smooth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::collections::VecDeque;
use std::f64::consts::PI;
use std::iter;

use crate::statistic::distribution::_std_dev;

/// Moving average types
pub enum MaMode {
SMA,
Expand Down Expand Up @@ -278,13 +280,6 @@ pub fn hull(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
.into_iter()
}

pub(crate) fn std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
data.windows(window).map(move |w| {
let mean = w.iter().sum::<f64>() / window as f64;
(w.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / window as f64).sqrt()
})
}

/// Volatility Index Dynamic Average (VIDYA)
///
/// A type of moving average that uses a combination of short-term and long-term
Expand All @@ -301,7 +296,7 @@ pub(crate) fn std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64>
/// ```
pub fn vidya(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
let alpha = 2.0 / (window + 1) as f64;
let std5 = std_dev(data, 5).collect::<Vec<f64>>();
let std5 = _std_dev(data, 5).collect::<Vec<f64>>();
let std20 = sma(&std5, 20).collect::<Vec<f64>>();
let offset = (5 - 1) + (20 - 1);
iter::repeat(f64::NAN).take(offset).chain(
Expand Down
206 changes: 206 additions & 0 deletions src/statistic/distribution.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
//! Distribution Functions
//!
//! Provides functions which describe the distribution of a dataset. This can relate to the
//! shape, centre, or dispersion of the
//! distribution[[1]](https://en.wikipedia.org/wiki/Probability_distribution)
use std::cmp::Ordering;
use std::iter;

pub(crate) fn _std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
data.windows(window).map(move |w| {
let mean = w.iter().sum::<f64>() / window as f64;
(w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64).sqrt()
})
}

/// Standard Deviation
///
/// A measure of the amount of variation of a random variable expected about its mean.
///
/// ## Sources
///
/// [[1]](https://en.wikipedia.org/wiki/Standard_deviation)
///
/// # Examples
///
/// ```
/// use traquer::statistic::distribution;
///
/// distribution::std_dev(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
/// ```
pub fn std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
iter::repeat(f64::NAN)
.take(window - 1)
.chain(_std_dev(data, window))
}

/// Kurtosis
///
/// A measure of the "tailedness" of the probability distribution of a real-valued random
/// variable. Computes the standard unbiased estimator.
///
/// ```math
/// \frac{(n+1)\,n\,(n-1)}{(n-2)\,(n-3)} \; \frac{\sum_{i=1}^n (x_i - \bar{x})^4}{\left(\sum_{i=1}^n (x_i - \bar{x})^2\right)^2} - 3\,\frac{(n-1)^2}{(n-2)\,(n-3)} \\[6pt]
/// ```
///
/// ## Sources
///
/// [[1]](https://en.wikipedia.org/wiki/Kurtosis)
///
/// # Examples
///
/// ```
/// use traquer::statistic::distribution;
///
/// distribution::kurtosis(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
/// ```
pub fn kurtosis(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
let adj1 = ((window + 1) * window * (window - 1)) as f64 / ((window - 2) * (window - 3)) as f64;
let adj2 = 3.0 * (window - 1).pow(2) as f64 / ((window - 2) * (window - 3)) as f64;
iter::repeat(f64::NAN)
.take(window - 1)
.chain(data.windows(window).map(move |w| {
let mean = w.iter().sum::<f64>() / window as f64;
let k4 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(4));
let k2 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2));
adj1 * k4 / k2.powi(2) - adj2
}))
}

/// Skew
///
/// The degree of asymmetry observed in a probability distribution. When data points on a
/// bell curve are not distributed symmetrically to the left and right sides of the median,
/// the bell curve is skewed. Distributions can be positive and right-skewed, or negative
/// and left-skewed.
///
/// Computes unbiased adjusted Fisher–Pearson standardized moment coefficient G1
///
/// ```math
/// g_1 = \frac{m_3}{m_2^{3/2}}
/// = \frac{\tfrac{1}{n} \sum_{i=1}^n (x_i-\overline{x})^3}{\left[\tfrac{1}{n} \sum_{i=1}^n (x_i-\overline{x})^2 \right]^{3/2}},
///
/// G_1 = \frac{k_3}{k_2^{3/2}} = \frac{n^2}{(n-1)(n-2)}\; b_1 = \frac{\sqrt{n(n-1)}}{n-2}\; g_1
/// ```
///
/// ## Sources
///
/// [[1]](https://en.wikipedia.org/wiki/Skewness)
///
/// # Examples
///
/// ```
/// use traquer::statistic::distribution;
///
/// distribution::skew(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
/// ```
pub fn skew(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
iter::repeat(f64::NAN)
.take(window - 1)
.chain(data.windows(window).map(move |w| {
let mean = w.iter().sum::<f64>() / window as f64;
let m3 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(3)) / window as f64;
let m2 = (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64)
.powf(3.0 / 2.0);
((window * (window - 1)) as f64).sqrt() / (window - 2) as f64 * m3 / m2
}))
}

fn quickselect(data: &[f64], k: usize) -> Option<f64> {
// https://rust-lang-nursery.github.io/rust-cookbook/science/mathematics/statistics.html
// TODO: implement median of medians to get pivot
let part = match data.len() {
0 => None,
_ => {
let (pivot_slice, tail) = data.split_at(1);
let pivot = pivot_slice[0];
let (left, right) = tail.iter().fold((vec![], vec![]), |mut splits, next| {
{
let (ref mut left, ref mut right) = &mut splits;
if next < &pivot {
left.push(*next);
} else {
right.push(*next);
}
}
splits
});

Some((left, pivot, right))
}
};

match part {
None => None,
Some((left, pivot, right)) => {
let pivot_idx = left.len();

match pivot_idx.cmp(&k) {
Ordering::Equal => Some(pivot),
Ordering::Greater => quickselect(&left, k),
Ordering::Less => quickselect(&right, k - (pivot_idx + 1)),
}
}
}
}

/// Median
///
/// The value separating the higher half from the lower half of a data sample, a population, or a
/// probability distribution within a window.
///
/// # Examples
///
/// ```
/// use traquer::statistic::distribution;
///
/// distribution::median(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>();
/// ```
pub fn median(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ {
iter::repeat(f64::NAN)
.take(window - 1)
.chain(data.windows(window).map(move |w| match window {
even if even % 2 == 0 => {
let fst_med = quickselect(w, (even / 2) - 1);
let snd_med = quickselect(w, even / 2);

match (fst_med, snd_med) {
(Some(fst), Some(snd)) => (fst + snd) * 0.5,
_ => f64::NAN,
}
}
odd => quickselect(w, odd / 2).unwrap(),
}))
}

/// Quantile
///
/// Partition a finite set of values into q subsets of (nearly) equal sizes. 50th percentile is
/// equivalent to median.
///
/// # Examples
///
/// ```
/// use traquer::statistic::distribution;
///
/// distribution::quantile(&vec![1.0,2.0,3.0,4.0,5.0], 3, 90.0).collect::<Vec<f64>>();
/// ```
pub fn quantile(data: &[f64], window: usize, q: f64) -> impl Iterator<Item = f64> + '_ {
iter::repeat(f64::NAN)
.take(window - 1)
.chain(data.windows(window).map(move |w| {
let pos = (window - 1) as f64 * (q.clamp(0.0, 100.0) / 100.0);
match pos {
exact if exact.fract() == 0.0 => quickselect(w, exact as usize).unwrap(),
_ => {
let lower = quickselect(w, pos.floor() as usize);
let upper = quickselect(w, pos.ceil() as usize);

match (lower, upper) {
(Some(l), Some(u)) => l * (pos.ceil() - pos) + u * (pos - pos.floor()),
_ => f64::NAN,
}
}
}
}))
}
2 changes: 2 additions & 0 deletions src/statistic/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod distribution;
pub mod regression;
Loading

0 comments on commit e6d9069

Please sign in to comment.