Skip to content

Commit

Permalink
handle missing values when calculating confidence intervals (#521)
Browse files Browse the repository at this point in the history
  • Loading branch information
simonpcouch authored Jan 31, 2024
1 parent e5095f0 commit 3866325
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 5 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# infer v1.0.5.9000 (development version)

* Fixed bug where `get_confidence_interval()` would error uninformatively when the supplied distribution of estimates contained missing values. The function will now warn and return a confidence interval calculated using the non-missing estimates.

* Updated infrastructure for errors, warnings, and messages (#513). Most of these changes will not be visible to users, though:
- Many longer error messages are now broken up into several lines.
- For references to help-files, users can now click on the error message's text to navigate to the cited documentation.
Expand Down
28 changes: 23 additions & 5 deletions R/get_confidence_interval.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,23 @@ switch_ci <- function(type, x, level, point_estimate) {
)
}

remove_missing_estimates <- function(estimates) {
na_estimates <- is.na(estimates)
na_estimates_n <- sum(na_estimates)

if (na_estimates_n > 0) {
cli_warn("{na_estimates_n} estimates were missing and were removed when \\
calculating the confidence interval.")
}

estimates[!na_estimates]
}

ci_percentile <- function(x, level) {
# x[[ncol(x)]] pulls out the stat or estimate column
ci_vec <- stats::quantile(x[[ncol(x)]], probs = (1 + c(-level, level)) / 2)
estimates <- remove_missing_estimates(x[[ncol(x)]])

ci_vec <- stats::quantile(estimates, probs = (1 + c(-level, level)) / 2)

make_ci_df(ci_vec)
}
Expand All @@ -247,7 +261,9 @@ ci_se <- function(x, level, point_estimate) {
}
} else {
# x[[ncol(x)]] pulls out the stat or estimate column
se <- stats::sd(x[[ncol(x)]])
estimates <- remove_missing_estimates(x[[ncol(x)]])
se <- stats::sd(estimates)

qfn <- "qnorm"
}

Expand All @@ -269,14 +285,16 @@ ci_bias_corrected <- function(x, level, point_estimate) {
point_estimate <- check_obs_stat(point_estimate)

# x[[ncol(x)]] pulls out the stat or estimate column
p <- mean(x[[ncol(x)]] <= point_estimate)
estimates <- remove_missing_estimates(x[[ncol(x)]])

p <- mean(estimates <= point_estimate)

z0 <- stats::qnorm(p)
# z_alpha_2 is z_(alpha/2)
z_alpha_2 <- stats::qnorm((1 + c(-level, level)) / 2)
new_probs <- stats::pnorm(2 * z0 + z_alpha_2)

# x[[ncol(x)]] pulls out the stat or estimate column
ci_vec <- stats::quantile(x[[ncol(x)]], probs = new_probs)
ci_vec <- stats::quantile(estimates, probs = new_probs)

make_ci_df(ci_vec)
}
Expand Down
8 changes: 8 additions & 0 deletions tests/testthat/_snaps/get_confidence_interval.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,11 @@
Error in `get_confidence_interval()`:
! Confidence intervals using a `z` distribution for `stat = mean` are not implemented.

# handles missing values gracefully (#520)

Code
res <- get_confidence_interval(boot_dist, 0.95)
Condition
Warning:
4 estimates were missing and were removed when calculating the confidence interval.

19 changes: 19 additions & 0 deletions tests/testthat/test-get_confidence_interval.R
Original file line number Diff line number Diff line change
Expand Up @@ -471,3 +471,22 @@ test_that("theoretical CIs check arguments properly", {
)
)
})

test_that("handles missing values gracefully (#520)", {
data <- data.frame(
prop = seq(0, 1, length.out = 10),
group = rep(c("a", "b"), each = 5L)
)

set.seed(1)
boot_dist <-
data %>%
specify(prop ~ group) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "bootstrap") %>%
calculate(stat = "diff in medians", order = c("b", "a"))

expect_snapshot(res <- get_confidence_interval(boot_dist, .95))

expect_s3_class(res, "data.frame")
})

0 comments on commit 3866325

Please sign in to comment.