Merge pull request #251 from tidymodels/develop

Develop
tidymodels · Oct 1, 2019 · 6021338 · 6021338
2 parents d48bacb + 1b7f4aa
commit 6021338
Show file tree

Hide file tree

Showing 91 changed files with 8,230 additions and 932 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -10,7 +10,6 @@
 ^docs*
 ^CONDUCT\.md$
 ^README\.md$
-^NEWS\.md$
 ^cran-comments\.md$
 ^_build\.sh$
 ^appveyor\.yml$

diff --git a/.travis.yml b/.travis.yml
@@ -11,6 +11,7 @@ latex: false
 env:
  global:
    - CRAN: http://cran.rstudio.com
+   - VDIFFR_RUN_TESTS: false
 
 notifications:
   email:

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: infer
 Type: Package
 Title: Tidy Statistical Inference
-Version: 0.4.1
+Version: 0.5.0
 Authors@R: c(
     person("Andrew", "Bray", email = "abray@reed.edu", role = c("aut", "cre")),
     person("Chester", "Ismay", email = "chester.ismay@gmail.com", role = "aut"),
@@ -28,7 +28,8 @@ Imports:
     ggplot2,
     magrittr,
     glue (>= 1.3.0),
-    grDevices
+    grDevices,
+    purrr
 Depends:
     R (>= 3.1.2)
 Suggests: 
@@ -39,7 +40,8 @@ Suggests:
     nycflights13,
     stringr,
     testthat,
-    covr
+    covr,
+    vdiffr
 URL: https://github.com/tidymodels/infer
 BugReports: https://github.com/tidymodels/infer/issues
 Roxygen: list(markdown = TRUE)

diff --git a/NAMESPACE b/NAMESPACE
@@ -46,14 +46,17 @@ importFrom(ggplot2,ylab)
 importFrom(glue,glue_collapse)
 importFrom(magrittr,"%>%")
 importFrom(methods,hasArg)
+importFrom(purrr,compact)
 importFrom(rlang,"!!")
 importFrom(rlang,":=")
 importFrom(rlang,enquo)
 importFrom(rlang,eval_tidy)
 importFrom(rlang,f_lhs)
 importFrom(rlang,f_rhs)
+importFrom(rlang,get_expr)
 importFrom(rlang,quo)
 importFrom(rlang,sym)
+importFrom(stats,as.formula)
 importFrom(stats,dchisq)
 importFrom(stats,df)
 importFrom(stats,dnorm)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,18 @@
+# infer 0.5.0
+
+## Breaking changes
+
+- `shade_confidence_interval()` now plots vertical lines starting from zero (previously - from the bottom of a plot) (#234).
+- `shade_p_value()` now uses "area under the curve" approach to shading (#229).
+
+## Other
+
+- Updated `chisq_test()` to take arguments in a response/explanatory format, perform goodness of fit tests, and default to the approximation approach (#241).
+- Updated `chisq_stat()` to do goodness of fit (#241).
+- Make interface to `hypothesize()` clearer by adding the options for the point null parameters to the function signature (#242).
+- Manage `infer` class more systematically (#219).
+- Use `vdiffr` for plot testing (#221).
+
 # infer 0.4.1
 
 - Added Evgeni Chasnovski as author for his incredible work on refactoring the package and providing excellent support.

diff --git a/R/calculate.R b/R/calculate.R
@@ -108,7 +108,7 @@ calculate <- function(x,
     )
   }
 #   else {
-#     class(result) <- append("infer", class(result))
+#     result <- append_infer_class(result)
 #   }
 
   result <- copy_attrs(to = result, from = x)
@@ -232,12 +232,12 @@ calc_impl.Chisq <- function(type, x, order, ...) {
       p_levels <- get_par_levels(x)
       x %>%
         dplyr::summarize(
-          stat = stats::chisq.test(
+          stat = suppressWarnings(stats::chisq.test(
             # Ensure correct ordering of parameters
             table(!!(attr(x, "response")))[p_levels],
             p = attr(x, "params")
           )$stat
-        )
+        ))
     } else {
       # Straight from `specify()`
       stop_glue(

diff --git a/R/generate.R b/R/generate.R
@@ -145,9 +145,7 @@ bootstrap <- function(x, reps = 1, ...) {
   result <- rep_sample_n(x, size = nrow(x), replace = TRUE, reps = reps)
   result <- copy_attrs(to = result, from = x)
 
-  class(result) <- append("infer", class(result))
-
-  result
+  append_infer_class(result)
 }
 
 #' @importFrom dplyr bind_rows group_by
@@ -159,9 +157,7 @@ permute <- function(x, reps = 1, ...) {
 
   df_out <- copy_attrs(to = df_out, from = x)
 
-  class(df_out) <- append("infer", class(df_out))
-
-  df_out
+  append_infer_class(df_out)
 }
 
 permute_once <- function(x, ...) {
@@ -195,7 +191,7 @@ simulate <- function(x, reps = 1, ...) {
 
   rep_tbl <- copy_attrs(to = rep_tbl, from = x)
 
-  class(rep_tbl) <- append("infer", class(rep_tbl))
-
-  dplyr::group_by(rep_tbl, replicate)
+  rep_tbl <- dplyr::group_by(rep_tbl, replicate)
+  
+  append_infer_class(rep_tbl)
 }
diff --git a/R/hypothesize.R b/R/hypothesize.R
@@ -3,7 +3,14 @@
 #' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
 #' @param null The null hypothesis. Options include `"independence"` and
 #'   `"point"`.
-#' @param ... Arguments passed to downstream functions.
+#' @param p The true proportion of successes (a number between 0 and 1). To be used with point null hypotheses when the specified response
+#' variable is categorical.
+#' @param mu The true mean (any numerical value). To be used with point null
+#' hypotheses when the specified response variable is continuous.
+#' @param med The true median (any numerical value). To be used with point null
+#' hypotheses when the specified response variable is continuous.
+#' @param sigma The true standard deviation (any numerical value). To be used with
+#' point null hypotheses.
 #'
 #' @return A tibble containing the response (and explanatory, if specified)
 #'   variable data with parameter information stored as well.
@@ -17,71 +24,43 @@
 #'   generate(reps = 100, type = "permute") %>%
 #'   calculate(stat = "F")
 #'
+#' @importFrom purrr compact
 #' @export
-hypothesize <- function(x, null, ...) {
-  hypothesize_checks(x, null)
+hypothesize <- function(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL) {
 
+  # Custom logic, because using match.arg() would give a default value when
+  # the user didn't specify anything.
+  null <- match_null_hypothesis(null)
   attr(x, "null") <- null
 
-  dots <- list(...)
-
-  if ((null == "point") && (length(dots) == 0)) {
-    stop_glue(
-      "Provide a parameter and a value to check such as `mu = 30` for the ",
-      "point hypothesis."
-    )
-  }
-
-  if ((null == "independence") && (length(dots) > 0)) {
-    warning_glue(
-      "Parameter values are not specified when testing that two variables are ",
-      "independent."
-    )
-  }
-
-  if ((length(dots) > 0) && (null == "point")) {
-    params <- parse_params(dots, x)
-    attr(x, "params") <- params
-
-    if (any(grepl("p.", attr(attr(x, "params"), "names")))) {
-     # simulate instead of bootstrap based on the value of `p` provided
-      attr(x, "type") <- "simulate"
-    } else {
-      attr(x, "type") <- "bootstrap"
-    }
+  hypothesize_checks(x, null)
 
-  }
+  dots <- compact(list(p = p, mu = mu, med = med, sigma = sigma))
 
-  if (!is.null(null) && (null == "independence")) {
-    attr(x, "type") <- "permute"
-  }
+  switch(
+    null,
+    independence =  {
+      params <- sanitize_hypothesis_params_independence(dots)
+      attr(x, "type") <- "permute"
+    },
+    point = {
+      params <- sanitize_hypothesis_params_point(dots, x)
+      attr(x, "params") <- unlist(params)
 
-  # Check one proportion test set up correctly
-  if (null == "point") {
-    if (is.factor(response_variable(x))) {
-      if (!any(grepl("p", attr(attr(x, "params"), "names")))) {
-        stop_glue(
-          'Testing one categorical variable requires `p` to be used as a ',
-          'parameter.'
-        )
+      if (!is.null(params$p)) {
+        # simulate instead of bootstrap based on the value of `p` provided
+        attr(x, "type") <- "simulate"
+      } else {
+        # Check one proportion test set up correctly
+        if (is.factor(response_variable(x))) {
+          stop_glue(
+            'Testing one categorical variable requires `p` to be used as a ',
+            'parameter.'
+          )
+        }
+        attr(x, "type") <- "bootstrap"
       }
     }
-  }
-
-  # Check one numeric test set up correctly
-  ## Not currently able to reach in testing as other checks
-  ## already produce errors
-  # if (null == "point") {
-  #   if (
-  #     !is.factor(response_variable(x))
-  #     & !any(grepl("mu|med|sigma", attr(attr(x, "params"), "names")))
-  #   ) {
-  #     stop_glue(
-  #       'Testing one numerical variable requires one of ',
-  #       '`mu`, `med`, or `sd` to be used as a parameter.'
-  #     )
-  #   }
-  # }
-
-  tibble::as_tibble(x)
+  )
+  append_infer_class(tibble::as_tibble(x))
 }
diff --git a/R/infer.R b/R/infer.R
@@ -17,7 +17,7 @@ NULL
 if (getRversion() >= "2.15.1") {
   utils::globalVariables(
     c(
-      "prop", "stat", "value", "x", "..density..", "statistic", ".",
+      "prop", "stat", "value", "x", "y", "..density..", "statistic", ".",
       "parameter", "p.value", "xmin", "x_min", "xmax", "x_max", "density",
       "denom", "diff_prop", "group_num", "n1", "n2", "num_suc", "p_hat",
       "total_suc", "explan", "probs", "conf.low", "conf.high"

diff --git a/R/shade_confidence_interval.R b/R/shade_confidence_interval.R
@@ -0,0 +1,76 @@
+#' Add information about confidence interval
+#'
+#' `shade_confidence_interval()` plots confidence interval region on top of the
+#' [visualize()] output. It should be used as \\{ggplot2\\} layer function (see
+#' examples). `shade_ci()` is its alias.
+#'
+#' @param endpoints A 2 element vector or a 1 x 2 data frame containing the
+#'   lower and upper values to be plotted. Most useful for visualizing
+#'   conference intervals.
+#' @param color A character or hex string specifying the color of the
+#'   end points as a vertical lines on the plot.
+#' @param fill A character or hex string specifying the color to shade the
+#'   confidence interval. If `NULL` then no shading is actually done.
+#' @param ... Other arguments passed along to \\{ggplot2\\} functions.
+#' @return A list of \\{ggplot2\\} objects to be added to the `visualize()`
+#'   output.
+#'
+#' @seealso [shade_p_value()] to add information about p-value region.
+#'
+#' @examples
+#' viz_plot <- mtcars %>%
+#'   dplyr::mutate(am = factor(am)) %>%
+#'   specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
+#'   hypothesize(null = "independence") %>%
+#'   generate(reps = 100, type = "permute") %>%
+#'   calculate(stat = "t", order = c("1", "0")) %>%
+#'   visualize(method = "both")
+#'
+#' viz_plot + shade_confidence_interval(c(-1.5, 1.5))
+#' viz_plot + shade_confidence_interval(c(-1.5, 1.5), fill = NULL)
+#'
+#' @name shade_confidence_interval
+NULL
+
+#' @rdname shade_confidence_interval
+#' @export
+shade_confidence_interval <- function(endpoints, color = "mediumaquamarine",
+                                      fill = "turquoise", ...) {
+  endpoints <- impute_endpoints(endpoints)
+  check_shade_confidence_interval_args(color, fill)
+
+  res <- list()
+  if (is.null(endpoints)) {
+    return(res)
+  }
+
+  if (!is.null(fill)) {
+    res <- c(
+      res, list(
+        ggplot2::geom_rect(
+          data = data.frame(endpoints[1]),
+          fill = fill, alpha = 0.6,
+          aes(xmin = endpoints[1], xmax = endpoints[2], ymin = 0, ymax = Inf),
+          inherit.aes = FALSE,
+          ...
+        )
+      )
+    )
+  }
+
+  c(
+    res,
+    list(
+      ggplot2::geom_segment(
+        data = data.frame(x = endpoints),
+        aes(x = x, xend = x, y = 0, yend = Inf),
+        colour = color, size = 2,
+        inherit.aes = FALSE
+      )
+    )
+  )
+}
+
+#' @rdname shade_confidence_interval
+#' @export
+shade_ci <- shade_confidence_interval
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,6 +11,7 @@ latex: false @@
     env:
      global:
        - CRAN: http://cran.rstudio.com
+       - VDIFFR_RUN_TESTS: false
     notifications:
       email:
@@ Expand Down @@