From 6e9d7671d6acd9798fca9a06f7d0629318141293 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Mon, 16 Sep 2024 15:31:11 +0200
Subject: [PATCH 01/13] cdf testing prototype

---
 rand_distr/tests/cdf.rs | 132 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 rand_distr/tests/cdf.rs
diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
new file mode 100644
index 0000000000..32284c81ad
--- /dev/null
+++ b/rand_distr/tests/cdf.rs
@@ -0,0 +1,132 @@
+// Copyright 2021 Developers of the Rand project.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use rand::SeedableRng;
+use rand_distr::{Distribution, Normal};
+use statrs::distribution::ContinuousCDF;
+use statrs::distribution::DiscreteCDF;
+
+/// Empirical Cumulative Distribution Function (ECDF)
+struct ECDF {
+    sorted_samples: Vec<f64>,
+}
+
+impl ECDF {
+    fn new(mut samples: Vec<f64>) -> Self {
+        samples.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        Self {
+            sorted_samples: samples,
+        }
+    }
+
+    /// Returns the step points of the ECDF
+    /// The ECDF is a step function that increases by 1/n at each sample point
+    /// The function is continoius from the right, so we give the bigger value at the step points
+    /// First point is (-inf, 0.0), last point is (max(samples), 1.0)
+    fn step_points(&self) -> Vec<(f64, f64)> {
+        let mut points = Vec::with_capacity(self.sorted_samples.len() + 1);
+        let mut last = f64::NEG_INFINITY;
+        let mut count = 0;
+        let n = self.sorted_samples.len() as f64;
+        for &x in &self.sorted_samples {
+            if x != last {
+                points.push((last, count as f64 / n));
+                last = x;
+            }
+            count += 1;
+        }
+        points.push((last, count as f64 / n));
+        points
+    }
+}
+
+fn kolmo_smirnov_statistic_continuous(ecdf: ECDF, cdf: impl Fn(f64) -> f64) -> f64 {
+    let mut max_diff = 0.;
+    // The maximum will always be at a step point, because the cdf is continious monotonic increasing
+    let step_points = ecdf.step_points();
+    for i in 0..step_points.len() - 1 {
+        // This shift is because we want the value of the ecdf at (x_{i + 1} - epsilon) = ecdf(x_i) and comare it to cdf(x_{i + 1})
+        // cdf(x_{i + 1} - epsilon) = cdf(x_{i+1}) because cdf is continious
+        let x = step_points[i + 1].0;
+        let diff = (step_points[i].1 - cdf(x)).abs();
+
+        if diff > max_diff {
+            max_diff = diff;
+        }
+    }
+    max_diff
+}
+
+fn kolmo_smirnov_statistic_discrete(ecdf: ECDF, cdf: impl Fn(i64) -> f64) -> f64 {
+    let mut max_diff = 0.;
+
+    // The maximum will always be at a step point, but we have to be careful because the cdf is not continious
+    // It is actually easier because both are right continious step functions
+    let step_points = ecdf.step_points();
+    for i in 1..step_points.len() {
+        let x = step_points[i].0;
+        let diff = (step_points[i].1 - cdf(x as i64)).abs();
+        if diff > max_diff {
+            max_diff = diff;
+        }
+    }
+    max_diff
+}
+#[test]
+fn normal() {
+    const N_SAMPLES: u64 = 1_000_000;
+    const MEAN: f64 = 2.;
+    const STD_DEV: f64 = 50.0;
+
+    let dist = Normal::new(MEAN, STD_DEV).unwrap();
+    let mut rng = rand::rngs::SmallRng::seed_from_u64(1);
+    let samples = (0..N_SAMPLES).map(|_| dist.sample(&mut rng)).collect();
+    let ecdf = ECDF::new(samples);
+
+    let cdf = |x| {
+        statrs::distribution::Normal::new(MEAN, STD_DEV)
+            .unwrap()
+            .cdf(x)
+    };
+
+    let ks_statistic = kolmo_smirnov_statistic_continuous(ecdf, cdf);
+
+    let critical_value = 1.36 / (N_SAMPLES as f64).sqrt();
+
+    println!("KS statistic: {}", ks_statistic);
+    println!("Critical value: {}", critical_value);
+    assert!(ks_statistic < critical_value);
+}
+
+#[test]
+fn binomial() {
+    const N_SAMPLES: u64 = 10_000_000;
+    const N: u64 = 10000000;
+    const P: f64 = 0.001;
+
+    let dist = rand_distr::Binomial::new(N, P).unwrap();
+    let mut rng = rand::rngs::SmallRng::seed_from_u64(1);
+    let samples = (0..N_SAMPLES)
+        .map(|_| dist.sample(&mut rng) as f64)
+        .collect();
+    let ecdf = ECDF::new(samples);
+
+    let cdf = |x| {
+        statrs::distribution::Binomial::new(P, N)
+            .unwrap()
+            .cdf(x as u64)
+    };
+
+    let ks_statistic = kolmo_smirnov_statistic_discrete(ecdf, cdf);
+
+    let critical_value = 1.36 / (N_SAMPLES as f64).sqrt();
+
+    println!("KS statistic: {}", ks_statistic);
+    println!("Critical value: {}", critical_value);
+    assert!(ks_statistic < critical_value);
+}

From 166b94548bfe65b808210f28dd934b52329cd2c7 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Tue, 17 Sep 2024 17:56:41 +0200
Subject: [PATCH 02/13] abstraction ove the test

---
 rand_distr/tests/cdf.rs | 57 +++++++++++++++++++++--------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index 32284c81ad..e1a8b4bf94 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -77,25 +77,17 @@ fn kolmo_smirnov_statistic_discrete(ecdf: ECDF, cdf: impl Fn(i64) -> f64) -> f64
     }
     max_diff
 }
-#[test]
-fn normal() {
-    const N_SAMPLES: u64 = 1_000_000;
-    const MEAN: f64 = 2.;
-    const STD_DEV: f64 = 50.0;
 
-    let dist = Normal::new(MEAN, STD_DEV).unwrap();
+#[cfg(test)]
+fn test_continious(dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
+    const N_SAMPLES: u64 = 1_000_000;
     let mut rng = rand::rngs::SmallRng::seed_from_u64(1);
     let samples = (0..N_SAMPLES).map(|_| dist.sample(&mut rng)).collect();
     let ecdf = ECDF::new(samples);
 
-    let cdf = |x| {
-        statrs::distribution::Normal::new(MEAN, STD_DEV)
-            .unwrap()
-            .cdf(x)
-    };
-
-    let ks_statistic = kolmo_smirnov_statistic_continuous(ecdf, cdf);
+    let ks_statistic = kolmo_smirnov_statistic_continuous(ecdf, |x| cdf(x));
 
+    // It is a heuristic value, we want to prove that the distributions match, so p values don't help us
     let critical_value = 1.36 / (N_SAMPLES as f64).sqrt();
 
     println!("KS statistic: {}", ks_statistic);
@@ -103,26 +95,19 @@ fn normal() {
     assert!(ks_statistic < critical_value);
 }
 
-#[test]
-fn binomial() {
-    const N_SAMPLES: u64 = 10_000_000;
-    const N: u64 = 10000000;
-    const P: f64 = 0.001;
-
-    let dist = rand_distr::Binomial::new(N, P).unwrap();
+#[cfg(test)]
+fn test_discrete<I: TryInto<i64>>(dist: impl Distribution<I>, cdf: impl Fn(i64) -> f64)
+where
+    <I as TryInto<i64>>::Error: std::fmt::Debug,
+{
+    const N_SAMPLES: u64 = 1_000_000;
     let mut rng = rand::rngs::SmallRng::seed_from_u64(1);
     let samples = (0..N_SAMPLES)
-        .map(|_| dist.sample(&mut rng) as f64)
+        .map(|_| dist.sample(&mut rng).try_into().unwrap() as f64)
         .collect();
     let ecdf = ECDF::new(samples);
 
-    let cdf = |x| {
-        statrs::distribution::Binomial::new(P, N)
-            .unwrap()
-            .cdf(x as u64)
-    };
-
-    let ks_statistic = kolmo_smirnov_statistic_discrete(ecdf, cdf);
+    let ks_statistic = kolmo_smirnov_statistic_discrete(ecdf, |x| cdf(x));
 
     let critical_value = 1.36 / (N_SAMPLES as f64).sqrt();
 
@@ -130,3 +115,19 @@ fn binomial() {
     println!("Critical value: {}", critical_value);
     assert!(ks_statistic < critical_value);
 }
+
+#[test]
+fn normal() {
+    test_continious(Normal::new(0.0, 1.0).unwrap(), |x| {
+        statrs::distribution::Normal::new(0.0, 1.0).unwrap().cdf(x)
+    });
+}
+
+#[test]
+fn binomial() {
+    test_discrete(rand_distr::Binomial::new(10, 0.5).unwrap(), |x| {
+        statrs::distribution::Binomial::new(0.5, 10)
+            .unwrap()
+            .cdf(x as u64)
+    });
+}

From e937ef63d6a907ae516763b2d8d4edab9dcb17cb Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Tue, 17 Sep 2024 18:10:24 +0200
Subject: [PATCH 03/13] fix internal dependency on rand_distr

---
 rand_distr/tests/cdf.rs | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index e1a8b4bf94..63d0a405b7 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -12,11 +12,11 @@ use statrs::distribution::ContinuousCDF;
 use statrs::distribution::DiscreteCDF;
 
 /// Empirical Cumulative Distribution Function (ECDF)
-struct ECDF {
+struct Ecdf {
     sorted_samples: Vec<f64>,
 }
 
-impl ECDF {
+impl Ecdf {
     fn new(mut samples: Vec<f64>) -> Self {
         samples.sort_by(|a, b| a.partial_cmp(b).unwrap());
         Self {
@@ -45,7 +45,7 @@ impl ECDF {
     }
 }
 
-fn kolmo_smirnov_statistic_continuous(ecdf: ECDF, cdf: impl Fn(f64) -> f64) -> f64 {
+fn kolmo_smirnov_statistic_continuous(ecdf: Ecdf, cdf: impl Fn(f64) -> f64) -> f64 {
     let mut max_diff = 0.;
     // The maximum will always be at a step point, because the cdf is continious monotonic increasing
     let step_points = ecdf.step_points();
@@ -62,15 +62,14 @@ fn kolmo_smirnov_statistic_continuous(ecdf: ECDF, cdf: impl Fn(f64) -> f64) -> f
     max_diff
 }
 
-fn kolmo_smirnov_statistic_discrete(ecdf: ECDF, cdf: impl Fn(i64) -> f64) -> f64 {
+fn kolmo_smirnov_statistic_discrete(ecdf: Ecdf, cdf: impl Fn(i64) -> f64) -> f64 {
     let mut max_diff = 0.;
 
     // The maximum will always be at a step point, but we have to be careful because the cdf is not continious
     // It is actually easier because both are right continious step functions
     let step_points = ecdf.step_points();
-    for i in 1..step_points.len() {
-        let x = step_points[i].0;
-        let diff = (step_points[i].1 - cdf(x as i64)).abs();
+    for (x, y) in step_points[1..].iter() {
+        let diff = (*y - cdf(*x as i64)).abs();
         if diff > max_diff {
             max_diff = diff;
         }
@@ -83,9 +82,9 @@ fn test_continious(dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
     const N_SAMPLES: u64 = 1_000_000;
     let mut rng = rand::rngs::SmallRng::seed_from_u64(1);
     let samples = (0..N_SAMPLES).map(|_| dist.sample(&mut rng)).collect();
-    let ecdf = ECDF::new(samples);
+    let ecdf = Ecdf::new(samples);
 
-    let ks_statistic = kolmo_smirnov_statistic_continuous(ecdf, |x| cdf(x));
+    let ks_statistic = kolmo_smirnov_statistic_continuous(ecdf, cdf);
 
     // It is a heuristic value, we want to prove that the distributions match, so p values don't help us
     let critical_value = 1.36 / (N_SAMPLES as f64).sqrt();
@@ -105,9 +104,9 @@ where
     let samples = (0..N_SAMPLES)
         .map(|_| dist.sample(&mut rng).try_into().unwrap() as f64)
         .collect();
-    let ecdf = ECDF::new(samples);
+    let ecdf = Ecdf::new(samples);
 
-    let ks_statistic = kolmo_smirnov_statistic_discrete(ecdf, |x| cdf(x));
+    let ks_statistic = kolmo_smirnov_statistic_discrete(ecdf, cdf);
 
     let critical_value = 1.36 / (N_SAMPLES as f64).sqrt();
 

From 886ca3493dd6526ab9a4386dfad56040ffa83cbb Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Thu, 19 Sep 2024 14:42:56 +0200
Subject: [PATCH 04/13] higher critical value, the old one would fail in 1 of
 20 cases even if everything works correctly

---
 rand_distr/tests/cdf.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index 63d0a405b7..bffa6eb638 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -80,14 +80,14 @@ fn kolmo_smirnov_statistic_discrete(ecdf: Ecdf, cdf: impl Fn(i64) -> f64) -> f64
 #[cfg(test)]
 fn test_continious(dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
     const N_SAMPLES: u64 = 1_000_000;
-    let mut rng = rand::rngs::SmallRng::seed_from_u64(1);
+    let mut rng = rand::rngs::SmallRng::seed_from_u64(6);
     let samples = (0..N_SAMPLES).map(|_| dist.sample(&mut rng)).collect();
     let ecdf = Ecdf::new(samples);
 
     let ks_statistic = kolmo_smirnov_statistic_continuous(ecdf, cdf);
 
-    // It is a heuristic value, we want to prove that the distributions match, so p values don't help us
-    let critical_value = 1.36 / (N_SAMPLES as f64).sqrt();
+    // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001). Passing this does not prove that the sampler is correct but is a good indication.
+    let critical_value = 1.95 / (N_SAMPLES as f64).sqrt();
 
     println!("KS statistic: {}", ks_statistic);
     println!("Critical value: {}", critical_value);
@@ -100,7 +100,7 @@ where
     <I as TryInto<i64>>::Error: std::fmt::Debug,
 {
     const N_SAMPLES: u64 = 1_000_000;
-    let mut rng = rand::rngs::SmallRng::seed_from_u64(1);
+    let mut rng = rand::rngs::SmallRng::seed_from_u64(2);
     let samples = (0..N_SAMPLES)
         .map(|_| dist.sample(&mut rng).try_into().unwrap() as f64)
         .collect();
@@ -108,7 +108,8 @@ where
 
     let ks_statistic = kolmo_smirnov_statistic_discrete(ecdf, cdf);
 
-    let critical_value = 1.36 / (N_SAMPLES as f64).sqrt();
+    // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001). Passing this does not prove that the sampler is correct but is a good indication.
+    let critical_value = 1.95 / (N_SAMPLES as f64).sqrt();
 
     println!("KS statistic: {}", ks_statistic);
     println!("Critical value: {}", critical_value);

From 954fd5db96178c0bb71b3296cd1e3083f435a8d3 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Wed, 25 Sep 2024 09:24:31 +0200
Subject: [PATCH 05/13] test 20 seeds

---
 rand_distr/tests/cdf.rs | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index bffa6eb638..d9b1127d1b 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -78,15 +78,16 @@ fn kolmo_smirnov_statistic_discrete(ecdf: Ecdf, cdf: impl Fn(i64) -> f64) -> f64
 }
 
 #[cfg(test)]
-fn test_continious(dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
+fn test_continious(seed: u64, dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
     const N_SAMPLES: u64 = 1_000_000;
-    let mut rng = rand::rngs::SmallRng::seed_from_u64(6);
+    let mut rng = rand::rngs::SmallRng::seed_from_u64(seed);
     let samples = (0..N_SAMPLES).map(|_| dist.sample(&mut rng)).collect();
     let ecdf = Ecdf::new(samples);
 
     let ks_statistic = kolmo_smirnov_statistic_continuous(ecdf, cdf);
 
-    // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001). Passing this does not prove that the sampler is correct but is a good indication.
+    // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001).
+    // Passing this does not prove that the sampler is correct but is a good indication.
     let critical_value = 1.95 / (N_SAMPLES as f64).sqrt();
 
     println!("KS statistic: {}", ks_statistic);
@@ -95,12 +96,12 @@ fn test_continious(dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
 }
 
 #[cfg(test)]
-fn test_discrete<I: TryInto<i64>>(dist: impl Distribution<I>, cdf: impl Fn(i64) -> f64)
+fn test_discrete<I: TryInto<i64>>(seed: u64, dist: impl Distribution<I>, cdf: impl Fn(i64) -> f64)
 where
     <I as TryInto<i64>>::Error: std::fmt::Debug,
 {
     const N_SAMPLES: u64 = 1_000_000;
-    let mut rng = rand::rngs::SmallRng::seed_from_u64(2);
+    let mut rng = rand::rngs::SmallRng::seed_from_u64(seed);
     let samples = (0..N_SAMPLES)
         .map(|_| dist.sample(&mut rng).try_into().unwrap() as f64)
         .collect();
@@ -118,16 +119,20 @@ where
 
 #[test]
 fn normal() {
-    test_continious(Normal::new(0.0, 1.0).unwrap(), |x| {
-        statrs::distribution::Normal::new(0.0, 1.0).unwrap().cdf(x)
-    });
+    for seed in 1..20 {
+        test_continious(seed, Normal::new(0.0, 1.0).unwrap(), |x| {
+            statrs::distribution::Normal::new(0.0, 1.0).unwrap().cdf(x)
+        });
+    }
 }
 
 #[test]
 fn binomial() {
-    test_discrete(rand_distr::Binomial::new(10, 0.5).unwrap(), |x| {
-        statrs::distribution::Binomial::new(0.5, 10)
-            .unwrap()
-            .cdf(x as u64)
-    });
+    for seed in 1..20 {
+        test_discrete(seed, rand_distr::Binomial::new(10, 0.5).unwrap(), |x| {
+            statrs::distribution::Binomial::new(0.5, 10)
+                .unwrap()
+                .cdf(x as u64)
+        });
+    }
 }

From 2367b8a5a31220901ea64afae98a3fdaa2a6c5fe Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Mon, 30 Sep 2024 16:17:55 +0200
Subject: [PATCH 06/13] fixed both KS statistic functions

---
 rand_distr/tests/cdf.rs | 62 ++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index d9b1127d1b..bb96803eb7 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -11,6 +11,10 @@ use rand_distr::{Distribution, Normal};
 use statrs::distribution::ContinuousCDF;
 use statrs::distribution::DiscreteCDF;
 
+// [1] Nonparametric Goodness-of-Fit Tests for Discrete Null Distributions
+//     by Taylor B. Arnold and John W. Emerson
+//     http://www.stat.yale.edu/~jay/EmersonMaterials/DiscreteGOF.pdf
+
 /// Empirical Cumulative Distribution Function (ECDF)
 struct Ecdf {
     sorted_samples: Vec<f64>,
@@ -26,7 +30,7 @@ impl Ecdf {
 
     /// Returns the step points of the ECDF
     /// The ECDF is a step function that increases by 1/n at each sample point
-    /// The function is continoius from the right, so we give the bigger value at the step points
+    /// The function is continuous from the right, so we give the bigger value at the step points
     /// First point is (-inf, 0.0), last point is (max(samples), 1.0)
     fn step_points(&self) -> Vec<(f64, f64)> {
         let mut points = Vec::with_capacity(self.sorted_samples.len() + 1);
@@ -45,46 +49,48 @@ impl Ecdf {
     }
 }
 
-fn kolmo_smirnov_statistic_continuous(ecdf: Ecdf, cdf: impl Fn(f64) -> f64) -> f64 {
-    let mut max_diff = 0.;
-    // The maximum will always be at a step point, because the cdf is continious monotonic increasing
-    let step_points = ecdf.step_points();
-    for i in 0..step_points.len() - 1 {
-        // This shift is because we want the value of the ecdf at (x_{i + 1} - epsilon) = ecdf(x_i) and comare it to cdf(x_{i + 1})
-        // cdf(x_{i + 1} - epsilon) = cdf(x_{i+1}) because cdf is continious
-        let x = step_points[i + 1].0;
-        let diff = (step_points[i].1 - cdf(x)).abs();
-
-        if diff > max_diff {
-            max_diff = diff;
-        }
+fn kolmogorov_smirnov_statistic_continuous(ecdf: Ecdf, cdf: impl Fn(f64) -> f64) -> f64 {
+    // We implement equation (3) from [1]
+
+    let mut max_diff: f64 = 0.;
+
+    let step_points = ecdf.step_points(); // x_i in the paper
+    for i in 1..step_points.len() {
+        let (x_i, f_i) = step_points[i];
+        let (_, f_i_1) = step_points[i - 1];
+        let max_1 = (cdf(x_i) - f_i).abs();
+        let max_2 = (cdf(x_i) - f_i_1).abs();
+
+        max_diff = max_diff.max(max_1).max(max_2);
     }
     max_diff
 }
 
-fn kolmo_smirnov_statistic_discrete(ecdf: Ecdf, cdf: impl Fn(i64) -> f64) -> f64 {
-    let mut max_diff = 0.;
+fn kolmogorov_smirnov_statistic_discrete(ecdf: Ecdf, cdf: impl Fn(i64) -> f64) -> f64 {
+    // We implement equation (4) from [1]
+    
+    let mut max_diff: f64 = 0.;
 
-    // The maximum will always be at a step point, but we have to be careful because the cdf is not continious
-    // It is actually easier because both are right continious step functions
-    let step_points = ecdf.step_points();
-    for (x, y) in step_points[1..].iter() {
-        let diff = (*y - cdf(*x as i64)).abs();
-        if diff > max_diff {
-            max_diff = diff;
-        }
+    let step_points = ecdf.step_points(); // x_i in the paper
+    for i in 1..step_points.len() {
+        let (x_i, f_i) = step_points[i];
+        let (_, f_i_1) = step_points[i - 1];
+        let max_1 = (cdf(x_i as i64) - f_i).abs();
+        let max_2 = (cdf(x_i as i64 - 1) - f_i_1).abs(); // -1 is the same as -epsilon, because we have integer support
+
+        max_diff = max_diff.max(max_1).max(max_2);
     }
     max_diff
 }
 
 #[cfg(test)]
-fn test_continious(seed: u64, dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
+fn test_continuous(seed: u64, dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
     const N_SAMPLES: u64 = 1_000_000;
     let mut rng = rand::rngs::SmallRng::seed_from_u64(seed);
     let samples = (0..N_SAMPLES).map(|_| dist.sample(&mut rng)).collect();
     let ecdf = Ecdf::new(samples);
 
-    let ks_statistic = kolmo_smirnov_statistic_continuous(ecdf, cdf);
+    let ks_statistic = kolmogorov_smirnov_statistic_continuous(ecdf, cdf);
 
     // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001).
     // Passing this does not prove that the sampler is correct but is a good indication.
@@ -107,7 +113,7 @@ where
         .collect();
     let ecdf = Ecdf::new(samples);
 
-    let ks_statistic = kolmo_smirnov_statistic_discrete(ecdf, cdf);
+    let ks_statistic = kolmogorov_smirnov_statistic_discrete(ecdf, cdf);
 
     // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001). Passing this does not prove that the sampler is correct but is a good indication.
     let critical_value = 1.95 / (N_SAMPLES as f64).sqrt();
@@ -120,7 +126,7 @@ where
 #[test]
 fn normal() {
     for seed in 1..20 {
-        test_continious(seed, Normal::new(0.0, 1.0).unwrap(), |x| {
+        test_continuous(seed, Normal::new(0.0, 1.0).unwrap(), |x| {
             statrs::distribution::Normal::new(0.0, 1.0).unwrap().cdf(x)
         });
     }

From da35ab2be28dc3dc3aefa6243ac8066a04e0145f Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Mon, 30 Sep 2024 16:28:05 +0200
Subject: [PATCH 07/13] fix binomial cdf, to give 0 for negative numbers

---
 rand_distr/tests/cdf.rs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index bb96803eb7..af31a53f60 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -68,7 +68,7 @@ fn kolmogorov_smirnov_statistic_continuous(ecdf: Ecdf, cdf: impl Fn(f64) -> f64)
 
 fn kolmogorov_smirnov_statistic_discrete(ecdf: Ecdf, cdf: impl Fn(i64) -> f64) -> f64 {
     // We implement equation (4) from [1]
-    
+
     let mut max_diff: f64 = 0.;
 
     let step_points = ecdf.step_points(); // x_i in the paper
@@ -136,9 +136,13 @@ fn normal() {
 fn binomial() {
     for seed in 1..20 {
         test_discrete(seed, rand_distr::Binomial::new(10, 0.5).unwrap(), |x| {
-            statrs::distribution::Binomial::new(0.5, 10)
-                .unwrap()
-                .cdf(x as u64)
+            if x < 0 {
+                0.0
+            } else {
+                statrs::distribution::Binomial::new(0.5, 10)
+                    .unwrap()
+                    .cdf(x as u64)
+            }
         });
     }
 }

From 72955e30c4776bf17635c800fbd53ec51d2877d9 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Mon, 30 Sep 2024 17:04:46 +0200
Subject: [PATCH 08/13] diverse parameters, removed some duplicated code

---
 rand_distr/tests/cdf.rs | 83 +++++++++++++++++++++++++++--------------
 1 file changed, 55 insertions(+), 28 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index af31a53f60..153b42cf18 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -6,6 +6,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
+use num_traits::AsPrimitive;
 use rand::SeedableRng;
 use rand_distr::{Distribution, Normal};
 use statrs::distribution::ContinuousCDF;
@@ -83,40 +84,46 @@ fn kolmogorov_smirnov_statistic_discrete(ecdf: Ecdf, cdf: impl Fn(i64) -> f64) -
     max_diff
 }
 
-#[cfg(test)]
-fn test_continuous(seed: u64, dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
-    const N_SAMPLES: u64 = 1_000_000;
-    let mut rng = rand::rngs::SmallRng::seed_from_u64(seed);
-    let samples = (0..N_SAMPLES).map(|_| dist.sample(&mut rng)).collect();
-    let ecdf = Ecdf::new(samples);
-
-    let ks_statistic = kolmogorov_smirnov_statistic_continuous(ecdf, cdf);
+const SAMPLE_SIZE: u64 = 1_000_000;
 
+fn critical_value() -> f64 {
     // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001).
     // Passing this does not prove that the sampler is correct but is a good indication.
-    let critical_value = 1.95 / (N_SAMPLES as f64).sqrt();
-
-    println!("KS statistic: {}", ks_statistic);
-    println!("Critical value: {}", critical_value);
-    assert!(ks_statistic < critical_value);
+    1.95 / (SAMPLE_SIZE as f64).sqrt()
 }
 
-#[cfg(test)]
-fn test_discrete<I: TryInto<i64>>(seed: u64, dist: impl Distribution<I>, cdf: impl Fn(i64) -> f64)
+fn sample_ecdf<T>(seed: u64, dist: impl Distribution<T>) -> Ecdf
 where
-    <I as TryInto<i64>>::Error: std::fmt::Debug,
+    T: AsPrimitive<f64>,
 {
-    const N_SAMPLES: u64 = 1_000_000;
     let mut rng = rand::rngs::SmallRng::seed_from_u64(seed);
-    let samples = (0..N_SAMPLES)
-        .map(|_| dist.sample(&mut rng).try_into().unwrap() as f64)
+    let samples = (0..SAMPLE_SIZE)
+        .map(|_| dist.sample(&mut rng).as_())
         .collect();
-    let ecdf = Ecdf::new(samples);
+    Ecdf::new(samples)
+}
+
+fn test_continuous(seed: u64, dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
+    let ecdf = sample_ecdf(seed, dist);
+    let ks_statistic = kolmogorov_smirnov_statistic_continuous(ecdf, cdf);
+
+    let critical_value = critical_value();
+
+    println!("KS statistic: {}", ks_statistic);
+    println!("Critical value: {}", critical_value);
+    assert!(ks_statistic < critical_value);
+}
 
+fn test_discrete<I: AsPrimitive<f64>>(
+    seed: u64,
+    dist: impl Distribution<I>,
+    cdf: impl Fn(i64) -> f64,
+) {
+    let ecdf = sample_ecdf(seed, dist);
     let ks_statistic = kolmogorov_smirnov_statistic_discrete(ecdf, cdf);
 
-    // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001). Passing this does not prove that the sampler is correct but is a good indication.
-    let critical_value = 1.95 / (N_SAMPLES as f64).sqrt();
+    // This critical value is bigger than it could be for discrete distributions, but because of large sample sizes this should not matter too much
+    let critical_value = critical_value();
 
     println!("KS statistic: {}", ks_statistic);
     println!("Critical value: {}", critical_value);
@@ -125,21 +132,41 @@ where
 
 #[test]
 fn normal() {
-    for seed in 1..20 {
-        test_continuous(seed, Normal::new(0.0, 1.0).unwrap(), |x| {
-            statrs::distribution::Normal::new(0.0, 1.0).unwrap().cdf(x)
+    let parameters = [
+        (0.0, 1.0),
+        (0.0, 0.1),
+        (1.0, 10.0),
+        (1.0, 100.0),
+        (-1.0, 0.00001),
+        (-1.0, 0.0000001),
+    ];
+
+    for (seed, (mean, std_dev)) in parameters.into_iter().enumerate() {
+        test_continuous(seed as u64, Normal::new(mean, std_dev).unwrap(), |x| {
+            statrs::distribution::Normal::new(mean, std_dev)
+                .unwrap()
+                .cdf(x)
         });
     }
 }
 
 #[test]
 fn binomial() {
-    for seed in 1..20 {
-        test_discrete(seed, rand_distr::Binomial::new(10, 0.5).unwrap(), |x| {
+    let parameters = [
+        (0.5, 10),
+        (0.5, 100),
+        (0.1, 10),
+        (0.0000001, 1000000),
+        (0.0000001, 10),
+        (0.9999, 2),
+    ];
+
+    for (seed, (p, n)) in parameters.into_iter().enumerate() {
+        test_discrete(seed as u64, rand_distr::Binomial::new(n, p).unwrap(), |x| {
             if x < 0 {
                 0.0
             } else {
-                statrs::distribution::Binomial::new(0.5, 10)
+                statrs::distribution::Binomial::new(p, n)
                     .unwrap()
                     .cdf(x as u64)
             }

From ea38d1409c2e7c6009ddeec1cab8a85a9a634a3e Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Thu, 3 Oct 2024 16:19:36 +0200
Subject: [PATCH 09/13] own impl of cdf not depending on statrs

---
 rand_distr/CHANGELOG.md |  1 +
 rand_distr/tests/cdf.rs | 44 +++++++++++++++++++++++++++++------------
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/rand_distr/CHANGELOG.md b/rand_distr/CHANGELOG.md
index 51bde39e86..fd13e76a4d 100644
--- a/rand_distr/CHANGELOG.md
+++ b/rand_distr/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 - The `serde1` feature has been renamed `serde` (#1477)
+- Add Kolmogorov Smirnov test for sampling of `Normal` and `Binomial` (#1494)
 
 ### Added
 - Add plots for `rand_distr` distributions to documentation (#1434)
diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index 153b42cf18..3013693171 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -6,11 +6,13 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
+use core::f64;
+
 use num_traits::AsPrimitive;
 use rand::SeedableRng;
 use rand_distr::{Distribution, Normal};
-use statrs::distribution::ContinuousCDF;
-use statrs::distribution::DiscreteCDF;
+use special::Beta;
+use special::Primitive;
 
 // [1] Nonparametric Goodness-of-Fit Tests for Discrete Null Distributions
 //     by Taylor B. Arnold and John W. Emerson
@@ -130,6 +132,10 @@ fn test_discrete<I: AsPrimitive<f64>>(
     assert!(ks_statistic < critical_value);
 }
 
+fn normal_cdf(x: f64, mean: f64, std_dev: f64) -> f64 {
+    0.5 * (1.0 + ((x - mean) / (std_dev * f64::consts::SQRT_2)).erf())
+}
+
 #[test]
 fn normal() {
     let parameters = [
@@ -143,13 +149,31 @@ fn normal() {
 
     for (seed, (mean, std_dev)) in parameters.into_iter().enumerate() {
         test_continuous(seed as u64, Normal::new(mean, std_dev).unwrap(), |x| {
-            statrs::distribution::Normal::new(mean, std_dev)
-                .unwrap()
-                .cdf(x)
+            normal_cdf(x, mean, std_dev)
         });
     }
 }
 
+fn binomial_cdf(k: i64, p: f64, n: u64) -> f64 {
+    if k < 0 {
+        return 0.0;
+    }
+    if k >= n as i64 {
+        return 1.0;
+    }
+
+    let a = n as f64 - k as f64;
+    let b = k as f64 + 1.0;
+
+    let q = 1.0 - p;
+
+    let ln_beta_ab = a.ln_beta(b);
+    
+    let reg_incomplete_beta = q.inc_beta(a, b, ln_beta_ab);
+
+    return reg_incomplete_beta;
+}    
+
 #[test]
 fn binomial() {
     let parameters = [
@@ -162,14 +186,8 @@ fn binomial() {
     ];
 
     for (seed, (p, n)) in parameters.into_iter().enumerate() {
-        test_discrete(seed as u64, rand_distr::Binomial::new(n, p).unwrap(), |x| {
-            if x < 0 {
-                0.0
-            } else {
-                statrs::distribution::Binomial::new(p, n)
-                    .unwrap()
-                    .cdf(x as u64)
-            }
+        test_discrete(seed as u64, rand_distr::Binomial::new(n, p).unwrap(), |k| {
+            binomial_cdf(k, p, n)
         });
     }
 }

From 17f3565d4c0e4c4eb5aebf0fe2a6f0b8739d30e2 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Thu, 3 Oct 2024 16:22:52 +0200
Subject: [PATCH 10/13] fmt + clippy

---
 rand_distr/tests/cdf.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index 3013693171..2624b2f3e3 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -168,11 +168,9 @@ fn binomial_cdf(k: i64, p: f64, n: u64) -> f64 {
     let q = 1.0 - p;
 
     let ln_beta_ab = a.ln_beta(b);
-    
-    let reg_incomplete_beta = q.inc_beta(a, b, ln_beta_ab);
 
-    return reg_incomplete_beta;
-}    
+    q.inc_beta(a, b, ln_beta_ab)
+}
 
 #[test]
 fn binomial() {

From 86e8d81ab569f8a896344e5dac9fd16ed2e30fbb Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Mon, 7 Oct 2024 15:33:04 +0200
Subject: [PATCH 11/13] review

---
 rand_distr/tests/cdf.rs | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index 2624b2f3e3..438b71ec93 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -61,8 +61,9 @@ fn kolmogorov_smirnov_statistic_continuous(ecdf: Ecdf, cdf: impl Fn(f64) -> f64)
     for i in 1..step_points.len() {
         let (x_i, f_i) = step_points[i];
         let (_, f_i_1) = step_points[i - 1];
-        let max_1 = (cdf(x_i) - f_i).abs();
-        let max_2 = (cdf(x_i) - f_i_1).abs();
+        let cdf_i = cdf(x_i);
+        let max_1 = (cdf_i - f_i).abs();
+        let max_2 = (cdf_i - f_i_1).abs();
 
         max_diff = max_diff.max(max_1).max(max_2);
     }
@@ -105,7 +106,9 @@ where
     Ecdf::new(samples)
 }
 
-fn test_continuous(seed: u64, dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
+/// Tests a distribution against an analytical CDF.
+/// The cdf has to be continuous.
+pub fn test_continuous(seed: u64, dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
     let ecdf = sample_ecdf(seed, dist);
     let ks_statistic = kolmogorov_smirnov_statistic_continuous(ecdf, cdf);
 
@@ -116,7 +119,9 @@ fn test_continuous(seed: u64, dist: impl Distribution<f64>, cdf: impl Fn(f64) ->
     assert!(ks_statistic < critical_value);
 }
 
-fn test_discrete<I: AsPrimitive<f64>>(
+/// Tests a distribution over integers against an analytical CDF.
+/// The analytical CDF must not have jump points which are not integers.
+pub fn test_discrete<I: AsPrimitive<f64>>(
     seed: u64,
     dist: impl Distribution<I>,
     cdf: impl Fn(i64) -> f64,

From 32be84f356aa184a305c1c1209f8b442088b379b Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Mon, 7 Oct 2024 15:34:17 +0200
Subject: [PATCH 12/13] capitalize CDF consistency in doc

---
 rand_distr/tests/cdf.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index 438b71ec93..fc74636c1e 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -107,7 +107,7 @@ where
 }
 
 /// Tests a distribution against an analytical CDF.
-/// The cdf has to be continuous.
+/// The CDF has to be continuous.
 pub fn test_continuous(seed: u64, dist: impl Distribution<f64>, cdf: impl Fn(f64) -> f64) {
     let ecdf = sample_ecdf(seed, dist);
     let ks_statistic = kolmogorov_smirnov_statistic_continuous(ecdf, cdf);

From 62dd2be19977202bb414174b16c1119323d76963 Mon Sep 17 00:00:00 2001
From: Benjamin Lieser <benjamin.lieser@mpinat.mpg.de>
Date: Tue, 8 Oct 2024 10:04:21 +0200
Subject: [PATCH 13/13] review

---
 rand_distr/tests/cdf.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs
index fc74636c1e..71b808d241 100644
--- a/rand_distr/tests/cdf.rs
+++ b/rand_distr/tests/cdf.rs
@@ -138,7 +138,7 @@ pub fn test_discrete<I: AsPrimitive<f64>>(
 }
 
 fn normal_cdf(x: f64, mean: f64, std_dev: f64) -> f64 {
-    0.5 * (1.0 + ((x - mean) / (std_dev * f64::consts::SQRT_2)).erf())
+    0.5 * ((mean - x) / (std_dev * f64::consts::SQRT_2)).erfc()
 }
 
 #[test]
@@ -163,11 +163,12 @@ fn binomial_cdf(k: i64, p: f64, n: u64) -> f64 {
     if k < 0 {
         return 0.0;
     }
-    if k >= n as i64 {
+    let k = k as u64;
+    if k >= n {
         return 1.0;
     }
 
-    let a = n as f64 - k as f64;
+    let a = (n - k) as f64;
     let b = k as f64 + 1.0;
 
     let q = 1.0 - p;