Skip to content

Commit

Permalink
Merge pull request #251 from tidymodels/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
andrewpbray authored Oct 1, 2019
2 parents d48bacb + 1b7f4aa commit 6021338
Show file tree
Hide file tree
Showing 91 changed files with 8,230 additions and 932 deletions.
1 change: 0 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
^docs*
^CONDUCT\.md$
^README\.md$
^NEWS\.md$
^cran-comments\.md$
^_build\.sh$
^appveyor\.yml$
Expand Down
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ latex: false
env:
global:
- CRAN: http://cran.rstudio.com
- VDIFFR_RUN_TESTS: false

notifications:
email:
Expand Down
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: infer
Type: Package
Title: Tidy Statistical Inference
Version: 0.4.1
Version: 0.5.0
Authors@R: c(
person("Andrew", "Bray", email = "abray@reed.edu", role = c("aut", "cre")),
person("Chester", "Ismay", email = "chester.ismay@gmail.com", role = "aut"),
Expand All @@ -28,7 +28,8 @@ Imports:
ggplot2,
magrittr,
glue (>= 1.3.0),
grDevices
grDevices,
purrr
Depends:
R (>= 3.1.2)
Suggests:
Expand All @@ -39,7 +40,8 @@ Suggests:
nycflights13,
stringr,
testthat,
covr
covr,
vdiffr
URL: https://github.com/tidymodels/infer
BugReports: https://github.com/tidymodels/infer/issues
Roxygen: list(markdown = TRUE)
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,17 @@ importFrom(ggplot2,ylab)
importFrom(glue,glue_collapse)
importFrom(magrittr,"%>%")
importFrom(methods,hasArg)
importFrom(purrr,compact)
importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,enquo)
importFrom(rlang,eval_tidy)
importFrom(rlang,f_lhs)
importFrom(rlang,f_rhs)
importFrom(rlang,get_expr)
importFrom(rlang,quo)
importFrom(rlang,sym)
importFrom(stats,as.formula)
importFrom(stats,dchisq)
importFrom(stats,df)
importFrom(stats,dnorm)
Expand Down
15 changes: 15 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
# infer 0.5.0

## Breaking changes

- `shade_confidence_interval()` now plots vertical lines starting from zero (previously - from the bottom of a plot) (#234).
- `shade_p_value()` now uses "area under the curve" approach to shading (#229).

## Other

- Updated `chisq_test()` to take arguments in a response/explanatory format, perform goodness of fit tests, and default to the approximation approach (#241).
- Updated `chisq_stat()` to do goodness of fit (#241).
- Make interface to `hypothesize()` clearer by adding the options for the point null parameters to the function signature (#242).
- Manage `infer` class more systematically (#219).
- Use `vdiffr` for plot testing (#221).

# infer 0.4.1

- Added Evgeni Chasnovski as author for his incredible work on refactoring the package and providing excellent support.
Expand Down
6 changes: 3 additions & 3 deletions R/calculate.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ calculate <- function(x,
)
}
# else {
# class(result) <- append("infer", class(result))
# result <- append_infer_class(result)
# }

result <- copy_attrs(to = result, from = x)
Expand Down Expand Up @@ -232,12 +232,12 @@ calc_impl.Chisq <- function(type, x, order, ...) {
p_levels <- get_par_levels(x)
x %>%
dplyr::summarize(
stat = stats::chisq.test(
stat = suppressWarnings(stats::chisq.test(
# Ensure correct ordering of parameters
table(!!(attr(x, "response")))[p_levels],
p = attr(x, "params")
)$stat
)
))
} else {
# Straight from `specify()`
stop_glue(
Expand Down
14 changes: 5 additions & 9 deletions R/generate.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,7 @@ bootstrap <- function(x, reps = 1, ...) {
result <- rep_sample_n(x, size = nrow(x), replace = TRUE, reps = reps)
result <- copy_attrs(to = result, from = x)

class(result) <- append("infer", class(result))

result
append_infer_class(result)
}

#' @importFrom dplyr bind_rows group_by
Expand All @@ -159,9 +157,7 @@ permute <- function(x, reps = 1, ...) {

df_out <- copy_attrs(to = df_out, from = x)

class(df_out) <- append("infer", class(df_out))

df_out
append_infer_class(df_out)
}

permute_once <- function(x, ...) {
Expand Down Expand Up @@ -195,7 +191,7 @@ simulate <- function(x, reps = 1, ...) {

rep_tbl <- copy_attrs(to = rep_tbl, from = x)

class(rep_tbl) <- append("infer", class(rep_tbl))

dplyr::group_by(rep_tbl, replicate)
rep_tbl <- dplyr::group_by(rep_tbl, replicate)
append_infer_class(rep_tbl)
}
97 changes: 38 additions & 59 deletions R/hypothesize.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
#' @param null The null hypothesis. Options include `"independence"` and
#' `"point"`.
#' @param ... Arguments passed to downstream functions.
#' @param p The true proportion of successes (a number between 0 and 1). To be used with point null hypotheses when the specified response
#' variable is categorical.
#' @param mu The true mean (any numerical value). To be used with point null
#' hypotheses when the specified response variable is continuous.
#' @param med The true median (any numerical value). To be used with point null
#' hypotheses when the specified response variable is continuous.
#' @param sigma The true standard deviation (any numerical value). To be used with
#' point null hypotheses.
#'
#' @return A tibble containing the response (and explanatory, if specified)
#' variable data with parameter information stored as well.
Expand All @@ -17,71 +24,43 @@
#' generate(reps = 100, type = "permute") %>%
#' calculate(stat = "F")
#'
#' @importFrom purrr compact
#' @export
hypothesize <- function(x, null, ...) {
hypothesize_checks(x, null)
hypothesize <- function(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL) {

# Custom logic, because using match.arg() would give a default value when
# the user didn't specify anything.
null <- match_null_hypothesis(null)
attr(x, "null") <- null

dots <- list(...)

if ((null == "point") && (length(dots) == 0)) {
stop_glue(
"Provide a parameter and a value to check such as `mu = 30` for the ",
"point hypothesis."
)
}

if ((null == "independence") && (length(dots) > 0)) {
warning_glue(
"Parameter values are not specified when testing that two variables are ",
"independent."
)
}

if ((length(dots) > 0) && (null == "point")) {
params <- parse_params(dots, x)
attr(x, "params") <- params

if (any(grepl("p.", attr(attr(x, "params"), "names")))) {
# simulate instead of bootstrap based on the value of `p` provided
attr(x, "type") <- "simulate"
} else {
attr(x, "type") <- "bootstrap"
}
hypothesize_checks(x, null)

}
dots <- compact(list(p = p, mu = mu, med = med, sigma = sigma))

if (!is.null(null) && (null == "independence")) {
attr(x, "type") <- "permute"
}
switch(
null,
independence = {
params <- sanitize_hypothesis_params_independence(dots)
attr(x, "type") <- "permute"
},
point = {
params <- sanitize_hypothesis_params_point(dots, x)
attr(x, "params") <- unlist(params)

# Check one proportion test set up correctly
if (null == "point") {
if (is.factor(response_variable(x))) {
if (!any(grepl("p", attr(attr(x, "params"), "names")))) {
stop_glue(
'Testing one categorical variable requires `p` to be used as a ',
'parameter.'
)
if (!is.null(params$p)) {
# simulate instead of bootstrap based on the value of `p` provided
attr(x, "type") <- "simulate"
} else {
# Check one proportion test set up correctly
if (is.factor(response_variable(x))) {
stop_glue(
'Testing one categorical variable requires `p` to be used as a ',
'parameter.'
)
}
attr(x, "type") <- "bootstrap"
}
}
}

# Check one numeric test set up correctly
## Not currently able to reach in testing as other checks
## already produce errors
# if (null == "point") {
# if (
# !is.factor(response_variable(x))
# & !any(grepl("mu|med|sigma", attr(attr(x, "params"), "names")))
# ) {
# stop_glue(
# 'Testing one numerical variable requires one of ',
# '`mu`, `med`, or `sd` to be used as a parameter.'
# )
# }
# }

tibble::as_tibble(x)
)
append_infer_class(tibble::as_tibble(x))
}
2 changes: 1 addition & 1 deletion R/infer.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ NULL
if (getRversion() >= "2.15.1") {
utils::globalVariables(
c(
"prop", "stat", "value", "x", "..density..", "statistic", ".",
"prop", "stat", "value", "x", "y", "..density..", "statistic", ".",
"parameter", "p.value", "xmin", "x_min", "xmax", "x_max", "density",
"denom", "diff_prop", "group_num", "n1", "n2", "num_suc", "p_hat",
"total_suc", "explan", "probs", "conf.low", "conf.high"
Expand Down
76 changes: 76 additions & 0 deletions R/shade_confidence_interval.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#' Add information about confidence interval
#'
#' `shade_confidence_interval()` plots confidence interval region on top of the
#' [visualize()] output. It should be used as \\{ggplot2\\} layer function (see
#' examples). `shade_ci()` is its alias.
#'
#' @param endpoints A 2 element vector or a 1 x 2 data frame containing the
#' lower and upper values to be plotted. Most useful for visualizing
#' conference intervals.
#' @param color A character or hex string specifying the color of the
#' end points as a vertical lines on the plot.
#' @param fill A character or hex string specifying the color to shade the
#' confidence interval. If `NULL` then no shading is actually done.
#' @param ... Other arguments passed along to \\{ggplot2\\} functions.
#' @return A list of \\{ggplot2\\} objects to be added to the `visualize()`
#' output.
#'
#' @seealso [shade_p_value()] to add information about p-value region.
#'
#' @examples
#' viz_plot <- mtcars %>%
#' dplyr::mutate(am = factor(am)) %>%
#' specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
#' hypothesize(null = "independence") %>%
#' generate(reps = 100, type = "permute") %>%
#' calculate(stat = "t", order = c("1", "0")) %>%
#' visualize(method = "both")
#'
#' viz_plot + shade_confidence_interval(c(-1.5, 1.5))
#' viz_plot + shade_confidence_interval(c(-1.5, 1.5), fill = NULL)
#'
#' @name shade_confidence_interval
NULL

#' @rdname shade_confidence_interval
#' @export
shade_confidence_interval <- function(endpoints, color = "mediumaquamarine",
fill = "turquoise", ...) {
endpoints <- impute_endpoints(endpoints)
check_shade_confidence_interval_args(color, fill)

res <- list()
if (is.null(endpoints)) {
return(res)
}

if (!is.null(fill)) {
res <- c(
res, list(
ggplot2::geom_rect(
data = data.frame(endpoints[1]),
fill = fill, alpha = 0.6,
aes(xmin = endpoints[1], xmax = endpoints[2], ymin = 0, ymax = Inf),
inherit.aes = FALSE,
...
)
)
)
}

c(
res,
list(
ggplot2::geom_segment(
data = data.frame(x = endpoints),
aes(x = x, xend = x, y = 0, yend = Inf),
colour = color, size = 2,
inherit.aes = FALSE
)
)
)
}

#' @rdname shade_confidence_interval
#' @export
shade_ci <- shade_confidence_interval
Loading

0 comments on commit 6021338

Please sign in to comment.