From d9d6b6f6ec7dfdd959678e25ba8799bd05972403 Mon Sep 17 00:00:00 2001
From: Andreas Busjahn <abusjahn@healthtwist.de>
Date: Thu, 1 Aug 2024 12:53:24 +0200
Subject: [PATCH] lilliefors correction in ksnormal test

---
 DESCRIPTION                     |   9 ++--
 NEWS.md                         |   6 ++-
 R/tests.R                       |  83 ++++++++++++++++++--------------
 man/ksnormal.Rd                 |   6 ++-
 tests/testthat/ksnormal_out.rda | Bin 96 -> 96 bytes
 tests/testthat/test-ksnormal.R  |   8 +--
 6 files changed, 64 insertions(+), 48 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c175b7b..224be8f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -10,8 +10,8 @@ Description: The main functionalities of 'wrappedtools' are:
     descriptive statistics and p-values; creating specialized plots for 
     correlation matrices. Functions were mainly written for my own daily work or 
     teaching, but may be of use to others as well.
-Version: 0.9.5
-Date: 2024-03-16
+Version: 0.9.6
+Date: 2024-07-30
 Authors@R: c(
     person(given = "Andreas",
            family = "Busjahn",
@@ -46,11 +46,12 @@ Imports:
     broom,
     rlist,
     DescTools,
-    flextable
+    flextable,
+    nortest
 Depends: 
     R (>= 4.2)
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 LazyData: true
 VignetteBuilder: knitr
 Suggests: rmarkdown, testthat, ggrepel
diff --git a/NEWS.md b/NEWS.md
index ac6e55e..19557e3 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,8 @@
-#wrappedtools 0.9.5 combines all updates from minor releases 9.4.x
+#wrappedtools 0.9.6
+- function ksnormal now uses Lilliefors test by default
+
+#wrappedtools 0.9.5 
+- combines all updates from minor releases 9.4.x
 
 #wrappedtools 0.9.4
 - ksnormal can handle input of 0 data points
diff --git a/R/tests.R b/R/tests.R
index 312639a..1e5b0c0 100644
--- a/R/tests.R
+++ b/R/tests.R
@@ -184,6 +184,7 @@ pairwise_ordcat_test <- function(dep_var, indep_var, adjmethod = "fdr", plevel =
 #' If less than 2 values are provided, NA is returned.
 #'
 #' @param x Vector of data to test.
+#' @param lillie Logical, should the Lilliefors test be used? Defaults to TRUE
 #'
 #' @return p.value from \link{ks.test}.
 #'
@@ -194,14 +195,22 @@ pairwise_ordcat_test <- function(dep_var, indep_var, adjmethod = "fdr", plevel =
 #'   sd = sd(mtcars$wt, na.rm = TRUE)
 #' )
 #' # wrapped version:
-#' ksnormal(x = mtcars$wt)
+#' ksnormal(x = mtcars$wt, lillie=FALSE)
 #' @export
-ksnormal <- function(x) {
+ksnormal <- function(x, lillie=TRUE) {
   if(length(na.omit(x))>1){
-  suppressWarnings(
-    assign("ksout",ks.test(x, "pnorm", mean(x, na.rm = TRUE), sd(x, na.rm = TRUE),
-                     exact = FALSE
-    )$p.value))
+    if(lillie){
+    assign("ksout",
+           nortest::lillie.test(x)$p.value)  
+    } else{
+      suppressWarnings(
+        assign("ksout",
+               ks.test(x, "pnorm", mean(x, na.rm = TRUE), 
+                       sd(x, na.rm = TRUE),
+                       exact = FALSE
+               )$p.value))
+    }
+    
   }else{
     ksout <- NA
   }
@@ -619,14 +628,14 @@ compare2qualvars <- function(data, dep_vars, indep_var,
           data |> 
           select(all_of(c(indep_var,var_i))) |> 
           mutate(testvar=forcats::fct_collapse(!!sym(var_i),
-                                      check=subgroups[sg_i],
-                                      other_level = 'other')) |> 
+                                               check=subgroups[sg_i],
+                                               other_level = 'other')) |> 
           select(all_of(indep_var),'testvar') |> table()
         if(ncol(testdata)>1) {
           p_sg <- fisher.test(testdata,
-                            simulate.p.value = TRUE,
-                            B = 10^5)$p.value |> 
-          formatP(mark = mark, pretext = pretext)
+                              simulate.p.value = TRUE,
+                              B = 10^5)$p.value |> 
+            formatP(mark = mark, pretext = pretext)
         } else{
           p_sg <- ''
         }
@@ -634,7 +643,7 @@ compare2qualvars <- function(data, dep_vars, indep_var,
           freqBYgroup[[var_i]]$p <- 
             paste(na.omit(freqBYgroup[[var_i]]$p),p_sg) |> 
             str_squish()} else {
-        freqBYgroup[[var_i]]$p[sg_i] <- p_sg
+              freqBYgroup[[var_i]]$p[sg_i] <- p_sg
             }
       }
     }
@@ -650,19 +659,19 @@ compare2qualvars <- function(data, dep_vars, indep_var,
   for (var_i in seq_along(dep_vars)) {
     if (!singleline) {
       out_tmp <- add_row(out[0,],
-                     Variable = c(
-                       dep_vars[var_i],
-                       glue::glue(
-                         "{indentor}{levels[[var_i]][[1]]}"
-                       )
-                     ),
-                     desc_all = c(spacer, freq[[var_i]][[1]]),
-                     g1 = c(spacer, freqBYgroup[[var_i]][[1]]),
-                     g2 = c(spacer, freqBYgroup[[var_i]][[2]]),
-                     p = c(
-                       p[[var_i]][[1]],
-                       rep(spacer, nrow(freqBYgroup[[var_i]]))
-                     )
+                         Variable = c(
+                           dep_vars[var_i],
+                           glue::glue(
+                             "{indentor}{levels[[var_i]][[1]]}"
+                           )
+                         ),
+                         desc_all = c(spacer, freq[[var_i]][[1]]),
+                         g1 = c(spacer, freqBYgroup[[var_i]][[1]]),
+                         g2 = c(spacer, freqBYgroup[[var_i]][[2]]),
+                         p = c(
+                           p[[var_i]][[1]],
+                           rep(spacer, nrow(freqBYgroup[[var_i]]))
+                         )
       )
       if(p_subgroups){
         out_tmp$pSubgroup <- c(spacer,freqBYgroup[[var_i]]$p)
@@ -670,16 +679,16 @@ compare2qualvars <- function(data, dep_vars, indep_var,
       out <- rbind(out,out_tmp)
     } else {
       out_tmp <- add_row(out[0,],
-                     Variable = paste(
-                       dep_vars[var_i],
-                       # rep(spacer,
-                       #     nrow(freqBYgroup[[var_i]])-1)),
-                       levels[[var_i]][[1]]
-                     ),
-                     desc_all = freq[[var_i]][[1]],
-                     g1 = freqBYgroup[[var_i]][[1]],
-                     g2 = freqBYgroup[[var_i]][[2]],
-                     p = p[[var_i]][[1]]
+                         Variable = paste(
+                           dep_vars[var_i],
+                           # rep(spacer,
+                           #     nrow(freqBYgroup[[var_i]])-1)),
+                           levels[[var_i]][[1]]
+                         ),
+                         desc_all = freq[[var_i]][[1]],
+                         g1 = freqBYgroup[[var_i]][[1]],
+                         g2 = freqBYgroup[[var_i]][[2]],
+                         p = p[[var_i]][[1]]
       )
       if(p_subgroups){
         out_tmp$pSubgroup <- freqBYgroup[[var_i]]$p
@@ -1142,7 +1151,7 @@ compare_n_numvars <- function(.data = rawdata,
       collapse = ";")) |>
         pivot_longer(everything(),names_to = 'Variable', 
                      values_to = 'p between groups')) |> 
-        # gather(key = "Variable", value = "p between groups")) |>
+    # gather(key = "Variable", value = "p between groups")) |>
     full_join(purrr::reduce(t$p_wcox_t_out, rbind) |>
                 matrix(nrow = length(dep_vars), byrow = FALSE) |>
                 as_tibble(.name_repair = "unique") |>
@@ -1155,7 +1164,7 @@ compare_n_numvars <- function(.data = rawdata,
     )) |>
       pivot_longer(everything(),names_to = 'Variable', 
                    values_to = 'p vs.ref'))  
-      # gather(key = "Variable", value = "p vs.ref"))
+  # gather(key = "Variable", value = "p vs.ref"))
   results <- cbind(
     results,
     purrr::map2_df(
diff --git a/man/ksnormal.Rd b/man/ksnormal.Rd
index 5a8c432..94e8828 100644
--- a/man/ksnormal.Rd
+++ b/man/ksnormal.Rd
@@ -4,10 +4,12 @@
 \alias{ksnormal}
 \title{Kolmogorov-Smirnov-Test against Normal distribution}
 \usage{
-ksnormal(x)
+ksnormal(x, lillie = TRUE)
 }
 \arguments{
 \item{x}{Vector of data to test.}
+
+\item{lillie}{Logical, should the Lilliefors test be used? Defaults to TRUE}
 }
 \value{
 p.value from \link{ks.test}.
@@ -24,5 +26,5 @@ ks.test(
   sd = sd(mtcars$wt, na.rm = TRUE)
 )
 # wrapped version:
-ksnormal(x = mtcars$wt)
+ksnormal(x = mtcars$wt, lillie=FALSE)
 }
diff --git a/tests/testthat/ksnormal_out.rda b/tests/testthat/ksnormal_out.rda
index b84e91f80eefd67a8a4d14d408392a280cd8cae7..9fc9c42d36e3ad634242dbb005a960e2bd89753a 100644
GIT binary patch
delta 35
lcmYdD5RvcZU;qQQ?gvQ;2?=QliHQ?MTsXovxusPAB><Z12_OIf

delta 35
mcmYdD5RvcZU;qQQ?gvQ;2?=QliAfVhTsW=@g&nE@N&o<yv<Yhf

diff --git a/tests/testthat/test-ksnormal.R b/tests/testthat/test-ksnormal.R
index d3fda68..a0cb50c 100644
--- a/tests/testthat/test-ksnormal.R
+++ b/tests/testthat/test-ksnormal.R
@@ -1,13 +1,13 @@
-# out1 <- ksnormal(x = mtcars$wt)
+# out1 <- ksnormal(x = mtcars$wt,lillie=FALSE)
 # out2 <- suppressWarnings(ks.test(
 #   x = mtcars$wt, pnorm, mean = mean(mtcars$wt, na.rm = TRUE),
 #   sd = sd(mtcars$wt, na.rm = TRUE))$p.value
 # )
 # saveRDS(list(out1=out1, out2=out2),file = 'tests/testthat/ksnormal_out.rda')
 
-test_that("ksnormal() with defaults", {
+test_that("ksnormal() without Lilliefors", {
   expected <- readRDS('ksnormal_out.rda')
-  expect_equal(ksnormal(x = mtcars$wt), expected[[1]])
-  expect_equal(ksnormal(x = mtcars$wt),expected[[2]])
+  expect_equal(ksnormal(x = mtcars$wt,lillie = FALSE), expected[[1]])
+  expect_equal(ksnormal(x = mtcars$wt, lillie = FALSE),expected[[2]])
 })