-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
739 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
pub mod momentum; | ||
pub mod smooth; | ||
pub mod statistic; | ||
pub mod trend; | ||
pub mod volatility; | ||
pub mod volume; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
//! Distribution Functions | ||
//! | ||
//! Provides functions which describe the distribution of a dataset. This can relate to the | ||
//! shape, centre, or dispersion of the | ||
//! distribution[[1]](https://en.wikipedia.org/wiki/Probability_distribution) | ||
use std::cmp::Ordering; | ||
use std::iter; | ||
|
||
pub(crate) fn _std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { | ||
data.windows(window).map(move |w| { | ||
let mean = w.iter().sum::<f64>() / window as f64; | ||
(w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64).sqrt() | ||
}) | ||
} | ||
|
||
/// Standard Deviation | ||
/// | ||
/// A measure of the amount of variation of a random variable expected about its mean. | ||
/// | ||
/// ## Sources | ||
/// | ||
/// [[1]](https://en.wikipedia.org/wiki/Standard_deviation) | ||
/// | ||
/// # Examples | ||
/// | ||
/// ``` | ||
/// use traquer::statistic::distribution; | ||
/// | ||
/// distribution::std_dev(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); | ||
/// ``` | ||
pub fn std_dev(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { | ||
iter::repeat(f64::NAN) | ||
.take(window - 1) | ||
.chain(_std_dev(data, window)) | ||
} | ||
|
||
/// Kurtosis | ||
/// | ||
/// A measure of the "tailedness" of the probability distribution of a real-valued random | ||
/// variable. Computes the standard unbiased estimator. | ||
/// | ||
/// ```math | ||
/// \frac{(n+1)\,n\,(n-1)}{(n-2)\,(n-3)} \; \frac{\sum_{i=1}^n (x_i - \bar{x})^4}{\left(\sum_{i=1}^n (x_i - \bar{x})^2\right)^2} - 3\,\frac{(n-1)^2}{(n-2)\,(n-3)} \\[6pt] | ||
/// ``` | ||
/// | ||
/// ## Sources | ||
/// | ||
/// [[1]](https://en.wikipedia.org/wiki/Kurtosis) | ||
/// | ||
/// # Examples | ||
/// | ||
/// ``` | ||
/// use traquer::statistic::distribution; | ||
/// | ||
/// distribution::kurtosis(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); | ||
/// ``` | ||
pub fn kurtosis(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { | ||
let adj1 = ((window + 1) * window * (window - 1)) as f64 / ((window - 2) * (window - 3)) as f64; | ||
let adj2 = 3.0 * (window - 1).pow(2) as f64 / ((window - 2) * (window - 3)) as f64; | ||
iter::repeat(f64::NAN) | ||
.take(window - 1) | ||
.chain(data.windows(window).map(move |w| { | ||
let mean = w.iter().sum::<f64>() / window as f64; | ||
let k4 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(4)); | ||
let k2 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)); | ||
adj1 * k4 / k2.powi(2) - adj2 | ||
})) | ||
} | ||
|
||
/// Skew | ||
/// | ||
/// The degree of asymmetry observed in a probability distribution. When data points on a | ||
/// bell curve are not distributed symmetrically to the left and right sides of the median, | ||
/// the bell curve is skewed. Distributions can be positive and right-skewed, or negative | ||
/// and left-skewed. | ||
/// | ||
/// Computes unbiased adjusted Fisher–Pearson standardized moment coefficient G1 | ||
/// | ||
/// ```math | ||
/// g_1 = \frac{m_3}{m_2^{3/2}} | ||
/// = \frac{\tfrac{1}{n} \sum_{i=1}^n (x_i-\overline{x})^3}{\left[\tfrac{1}{n} \sum_{i=1}^n (x_i-\overline{x})^2 \right]^{3/2}}, | ||
/// | ||
/// G_1 = \frac{k_3}{k_2^{3/2}} = \frac{n^2}{(n-1)(n-2)}\; b_1 = \frac{\sqrt{n(n-1)}}{n-2}\; g_1 | ||
/// ``` | ||
/// | ||
/// ## Sources | ||
/// | ||
/// [[1]](https://en.wikipedia.org/wiki/Skewness) | ||
/// | ||
/// # Examples | ||
/// | ||
/// ``` | ||
/// use traquer::statistic::distribution; | ||
/// | ||
/// distribution::skew(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); | ||
/// ``` | ||
pub fn skew(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { | ||
iter::repeat(f64::NAN) | ||
.take(window - 1) | ||
.chain(data.windows(window).map(move |w| { | ||
let mean = w.iter().sum::<f64>() / window as f64; | ||
let m3 = w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(3)) / window as f64; | ||
let m2 = (w.iter().fold(0.0, |acc, x| acc + (x - mean).powi(2)) / window as f64) | ||
.powf(3.0 / 2.0); | ||
((window * (window - 1)) as f64).sqrt() / (window - 2) as f64 * m3 / m2 | ||
})) | ||
} | ||
|
||
fn quickselect(data: &[f64], k: usize) -> Option<f64> { | ||
// https://rust-lang-nursery.github.io/rust-cookbook/science/mathematics/statistics.html | ||
// TODO: implement median of medians to get pivot | ||
let part = match data.len() { | ||
0 => None, | ||
_ => { | ||
let (pivot_slice, tail) = data.split_at(1); | ||
let pivot = pivot_slice[0]; | ||
let (left, right) = tail.iter().fold((vec![], vec![]), |mut splits, next| { | ||
{ | ||
let (ref mut left, ref mut right) = &mut splits; | ||
if next < &pivot { | ||
left.push(*next); | ||
} else { | ||
right.push(*next); | ||
} | ||
} | ||
splits | ||
}); | ||
|
||
Some((left, pivot, right)) | ||
} | ||
}; | ||
|
||
match part { | ||
None => None, | ||
Some((left, pivot, right)) => { | ||
let pivot_idx = left.len(); | ||
|
||
match pivot_idx.cmp(&k) { | ||
Ordering::Equal => Some(pivot), | ||
Ordering::Greater => quickselect(&left, k), | ||
Ordering::Less => quickselect(&right, k - (pivot_idx + 1)), | ||
} | ||
} | ||
} | ||
} | ||
|
||
/// Median | ||
/// | ||
/// The value separating the higher half from the lower half of a data sample, a population, or a | ||
/// probability distribution within a window. | ||
/// | ||
/// # Examples | ||
/// | ||
/// ``` | ||
/// use traquer::statistic::distribution; | ||
/// | ||
/// distribution::median(&vec![1.0,2.0,3.0,4.0,5.0], 3).collect::<Vec<f64>>(); | ||
/// ``` | ||
pub fn median(data: &[f64], window: usize) -> impl Iterator<Item = f64> + '_ { | ||
iter::repeat(f64::NAN) | ||
.take(window - 1) | ||
.chain(data.windows(window).map(move |w| match window { | ||
even if even % 2 == 0 => { | ||
let fst_med = quickselect(w, (even / 2) - 1); | ||
let snd_med = quickselect(w, even / 2); | ||
|
||
match (fst_med, snd_med) { | ||
(Some(fst), Some(snd)) => (fst + snd) * 0.5, | ||
_ => f64::NAN, | ||
} | ||
} | ||
odd => quickselect(w, odd / 2).unwrap(), | ||
})) | ||
} | ||
|
||
/// Quantile | ||
/// | ||
/// Partition a finite set of values into q subsets of (nearly) equal sizes. 50th percentile is | ||
/// equivalent to median. | ||
/// | ||
/// # Examples | ||
/// | ||
/// ``` | ||
/// use traquer::statistic::distribution; | ||
/// | ||
/// distribution::quantile(&vec![1.0,2.0,3.0,4.0,5.0], 3, 90.0).collect::<Vec<f64>>(); | ||
/// ``` | ||
pub fn quantile(data: &[f64], window: usize, q: f64) -> impl Iterator<Item = f64> + '_ { | ||
iter::repeat(f64::NAN) | ||
.take(window - 1) | ||
.chain(data.windows(window).map(move |w| { | ||
let pos = (window - 1) as f64 * (q.clamp(0.0, 100.0) / 100.0); | ||
match pos { | ||
exact if exact.fract() == 0.0 => quickselect(w, exact as usize).unwrap(), | ||
_ => { | ||
let lower = quickselect(w, pos.floor() as usize); | ||
let upper = quickselect(w, pos.ceil() as usize); | ||
|
||
match (lower, upper) { | ||
(Some(l), Some(u)) => l * (pos.ceil() - pos) + u * (pos - pos.floor()), | ||
_ => f64::NAN, | ||
} | ||
} | ||
} | ||
})) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
pub mod distribution; | ||
pub mod regression; |
Oops, something went wrong.