From 8f9d7b7fd7de3ca47f6c336332214319a02f2922 Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Fri, 24 Jul 2020 14:17:16 -0700
Subject: [PATCH 01/13] possible rep_sample_n() re-implementation (#279)

also adds a new rep_slice_sample wrapper that has a more similar interface to dplyr::slice_sample().

still need to extend unit testing and rewrite examples.
---
 NAMESPACE                          |  2 +-
 R/rep_sample_n.R                   | 54 +++++++++++-------------------
 man/get_confidence_interval.Rd     |  2 +-
 man/rep_sample_n.Rd                | 10 +++++-
 tests/testthat/test-rep_sample_n.R | 15 +++++----
 5 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index ebcfda1b..b66dea2f 100755
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -17,6 +17,7 @@ export(hypothesize)
 export(p_value)
 export(prop_test)
 export(rep_sample_n)
+export(rep_slice_sample)
 export(shade_ci)
 export(shade_confidence_interval)
 export(shade_p_value)
@@ -28,7 +29,6 @@ export(visualise)
 export(visualize)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,group_by)
-importFrom(dplyr,inner_join)
 importFrom(dplyr,mutate_if)
 importFrom(dplyr,n)
 importFrom(dplyr,one_of)
diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index 19e882c6..e0f6557b 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -14,6 +14,11 @@
 #'
 #' @return A tibble of size `rep` times `size` rows corresponding to `rep`
 #'   samples of size n = `size` from `tbl`.
+#'   
+#' @details The [dplyr::sample_n()] function from that `rep_sample_n()` function 
+#' was originally written to supplement has been superseded 
+#' by [dplyr::slice_sample()]. `rep_slice_sample()` provides a light wrapper
+#' around `rep_sample_n()` that has a more similar interface to `slice_sample()`
 #'
 #' @examples
 #' suppressPackageStartupMessages(library(dplyr))
@@ -39,14 +44,8 @@
 #'   geom_density() +
 #'   labs(x = "p_hat", y = "Number of samples",
 #'   title = "Sampling distribution of p_hat from 1000 samples of size 50")
-#'
-#' @importFrom dplyr pull
-#' @importFrom dplyr inner_join
-#' @importFrom dplyr group_by
 #' @export
 rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
-  n <- nrow(tbl)
-
   check_type(tbl, is.data.frame)
   check_type(size, is.numeric)
   check_type(replace, is.logical)
@@ -55,33 +54,20 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
     check_type(prob, is.numeric)
   }
 
-  # assign non-uniform probabilities
-  # there should be a better way!!
-  # prob needs to be nrow(tbl) -- not just number of factor levels
-  if (!is.null(prob)) {
-    if (length(prob) != n) {
-      stop_glue(
-        "The argument `prob` must have length `nrow(tbl)` = {nrow(tbl)}"
-      )
-    }
-
-    prob <- tibble::tibble(vals = levels(dplyr::pull(tbl, 1))) %>%
-      dplyr::mutate(probs = prob) %>%
-      dplyr::inner_join(tbl) %>%
-      dplyr::select(probs) %>%
-      dplyr::pull()
-  }
+  1:reps %>%
+    purrr::map_dfr(
+      ~ tbl %>%
+        dplyr::slice_sample(n = size, weight_by = prob, replace = replace)
+    ) %>%
+    dplyr::mutate(
+      replicate = rep(1:reps, each = size), 
+      .before = dplyr::everything()
+    ) %>%
+    dplyr::group_by(replicate)
+}
 
-  i <- unlist(replicate(
-    reps,
-    sample.int(n, size, replace = replace, prob = prob),
-    simplify = FALSE
-  ))
-  rep_tbl <- cbind(
-    replicate = rep(1:reps, rep(size, reps)),
-    tbl[i, ]
-  )
-  rep_tbl <- tibble::as_tibble(rep_tbl)
-  names(rep_tbl)[-1] <- names(tbl)
-  dplyr::group_by(rep_tbl, replicate)
+#' @rdname rep_sample_n
+#' @export
+rep_slice_sample <- function(tbl, n, replace = FALSE, reps = 1, weight_by = NULL) {
+  rep_sample_n(tbl, n, replace, reps, weight_by)
 }
diff --git a/man/get_confidence_interval.Rd b/man/get_confidence_interval.Rd
index f534b35f..284b30a9 100644
--- a/man/get_confidence_interval.Rd
+++ b/man/get_confidence_interval.Rd
@@ -36,7 +36,7 @@ correspond to lower and upper bounds of the confidence interval.
 }
 \description{
 Compute a confidence interval around a summary statistic. Only
-simulation-based methods are (currently only) supported.
+simulation-based methods are (currently) supported.
 
 Learn more in \code{vignette("infer")}.
 }
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index 5607d1df..99c96c6f 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -2,9 +2,12 @@
 % Please edit documentation in R/rep_sample_n.R
 \name{rep_sample_n}
 \alias{rep_sample_n}
+\alias{rep_slice_sample}
 \title{Perform repeated sampling}
 \usage{
 rep_sample_n(tbl, size, replace = FALSE, reps = 1, prob = NULL)
+
+rep_slice_sample(tbl, n, replace = FALSE, reps = 1, weight_by = NULL)
 }
 \arguments{
 \item{tbl}{Data frame of population from which to sample.}
@@ -26,6 +29,12 @@ samples of size n = \code{size} from \code{tbl}.
 Perform repeated sampling of samples of size n. Useful for creating sampling
 distributions.
 }
+\details{
+The \code{\link[dplyr:sample_n]{dplyr::sample_n()}} function from that \code{rep_sample_n()} function
+was originally written to supplement has been superseded
+by \code{\link[dplyr:slice]{dplyr::slice_sample()}}. \code{rep_slice_sample()} provides a light wrapper
+around \code{rep_sample_n()} that has a more similar interface to \code{slice_sample()}
+}
 \examples{
 suppressPackageStartupMessages(library(dplyr))
 suppressPackageStartupMessages(library(ggplot2))
@@ -50,5 +59,4 @@ ggplot(p_hats, aes(x = prop_hurricane)) +
   geom_density() +
   labs(x = "p_hat", y = "Number of samples",
   title = "Sampling distribution of p_hat from 1000 samples of size 50")
-
 }
diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R
index 9bdbc4b0..0cf860fb 100644
--- a/tests/testthat/test-rep_sample_n.R
+++ b/tests/testthat/test-rep_sample_n.R
@@ -2,25 +2,26 @@ context("rep_sample_n")
 
 N <- 5
 population <- tibble::tibble(
-  ball_ID = 1:N,
+  ball_id = 1:N,
   color = factor(c(rep("red", 3), rep("white", N - 3)))
 )
 
 test_that("rep_sample_n works", {
-  expect_silent(population %>% rep_sample_n(size = 2, reps = 10))
+  expect_silent(test_rep <- population %>% rep_sample_n(size = 2, reps = 10))
+  
   expect_error(
     population %>%
       rep_sample_n(size = 2, reps = 10, prob = rep(x = 1/5, times = 100))
   )
+  
   expect_error(
     population %>%
       rep_sample_n(size = 2, reps = 10, prob = c(1/2, 1/2))
   )
-  expect_error(
-    population %>%
-      rep_sample_n(size = 2, reps = 10, prob = c(0.25, 1/5, 1/5, 1/5, 0.15))
-  )
   
-  test_rep <- population %>% rep_sample_n(size = 2, reps = 10)
   expect_equal(c("replicate", names(population)), names(test_rep))
 })
+
+test_that("rep_slice_sample works", {
+  
+})

From 5882a2be8c4d667ec2dfde9666712025582ef1a1 Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Fri, 24 Jul 2020 15:02:05 -0700
Subject: [PATCH 02/13] add unit testing, adjust docs

---
 R/rep_sample_n.R                   | 50 ++++++++++---------
 man/rep_sample_n.Rd                | 50 ++++++++++---------
 tests/testthat/test-rep_sample_n.R | 79 ++++++++++++++++++++++++++++--
 3 files changed, 129 insertions(+), 50 deletions(-)

diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index e0f6557b..4edffd78 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -2,48 +2,50 @@
 #'
 #' @description
 #'
-#' Perform repeated sampling of samples of size n. Useful for creating sampling
-#' distributions.
+#' These functions extend the functionality of [dplyr::sample_n()] and 
+#' [dplyr::slice_sample()] by allowing for repeated sampling of data.
+#' This operation is especially helpful while creating sampling 
+#' distributions—see the examples below!
 #'
 #' @param tbl Data frame of population from which to sample.
 #' @param size Sample size of each sample.
 #' @param replace Should sampling be with replacement?
 #' @param reps Number of samples of size n = `size` to take.
-#' @param prob A vector of probability weights for obtaining the elements of the
-#'   vector being sampled.
+#' @param prob A vector of sampling weights for each of the rows in `tbl`—must
+#'   have length equal to `nrow(tbl)`.
+#' @param n Sample size of each sample.
+#' @param weight_by A vector of sampling weights for each of the rows 
+#'   in `tbl`—must have length equal to `nrow(tbl)`.
 #'
-#' @return A tibble of size `rep` times `size` rows corresponding to `rep`
-#'   samples of size n = `size` from `tbl`.
+#' @return A tibble of size `rep * size` rows corresponding to `reps`
+#'   samples of size `size` from `tbl`, grouped by `replicate`.
 #'   
 #' @details The [dplyr::sample_n()] function from that `rep_sample_n()` function 
 #' was originally written to supplement has been superseded 
 #' by [dplyr::slice_sample()]. `rep_slice_sample()` provides a light wrapper
-#' around `rep_sample_n()` that has a more similar interface to `slice_sample()`
+#' around `rep_sample_n()` that has a more similar interface to `slice_sample()`.
 #'
 #' @examples
-#' suppressPackageStartupMessages(library(dplyr))
-#' suppressPackageStartupMessages(library(ggplot2))
-#'
-#' # A virtual population of N = 10,010, of which 3091 are hurricanes
-#' population <- dplyr::storms %>%
-#'   select(status)
-#'
-#' # Take samples of size n = 50 storms without replacement; do this 1000 times
-#' samples <- population %>%
+#' library(dplyr)
+#' library(ggplot2)
+#' 
+#' # take 1000 samples of size n = 50, without replacement
+#' resamples <- gss %>%
 #'   rep_sample_n(size = 50, reps = 1000)
-#' samples
+#'   
+#' resamples
 #'
-#' # Compute p_hats for all 1000 samples = proportion hurricanes
-#' p_hats <- samples %>%
+#' # compute the proportion of respondents with a college
+#' # degree in each replicate
+#' p_hats <- resamples %>%
 #'   group_by(replicate) %>%
-#'   summarize(prop_hurricane = mean(status == "hurricane"))
-#' p_hats
+#'   summarize(prop_college = mean(college == "degree")) 
 #'
-#' # Plot sampling distribution
-#' ggplot(p_hats, aes(x = prop_hurricane)) +
+#' # plot sampling distribution
+#' ggplot(p_hats, aes(x = prop_college)) +
 #'   geom_density() +
 #'   labs(x = "p_hat", y = "Number of samples",
-#'   title = "Sampling distribution of p_hat from 1000 samples of size 50")
+#'   title = "Sampling distribution of p_hat")
 #' @export
 rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
   check_type(tbl, is.data.frame)
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index 99c96c6f..460493f6 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -18,45 +18,49 @@ rep_slice_sample(tbl, n, replace = FALSE, reps = 1, weight_by = NULL)
 
 \item{reps}{Number of samples of size n = \code{size} to take.}
 
-\item{prob}{A vector of probability weights for obtaining the elements of the
-vector being sampled.}
+\item{prob}{A vector of sampling weights for each of the rows in \code{tbl}—must
+have length equal to \code{nrow(tbl)}.}
+
+\item{n}{Sample size of each sample.}
+
+\item{weight_by}{A vector of sampling weights for each of the rows
+in \code{tbl}—must have length equal to \code{nrow(tbl)}.}
 }
 \value{
-A tibble of size \code{rep} times \code{size} rows corresponding to \code{rep}
-samples of size n = \code{size} from \code{tbl}.
+A tibble of size \code{rep * size} rows corresponding to \code{reps}
+samples of size \code{size} from \code{tbl}, grouped by \code{replicate}.
 }
 \description{
-Perform repeated sampling of samples of size n. Useful for creating sampling
-distributions.
+These functions extend the functionality of \code{\link[dplyr:sample_n]{dplyr::sample_n()}} and
+\code{\link[dplyr:slice]{dplyr::slice_sample()}} by allowing for repeated sampling of data.
+This operation is especially helpful while creating sampling
+distributions—see the examples below!
 }
 \details{
 The \code{\link[dplyr:sample_n]{dplyr::sample_n()}} function from that \code{rep_sample_n()} function
 was originally written to supplement has been superseded
 by \code{\link[dplyr:slice]{dplyr::slice_sample()}}. \code{rep_slice_sample()} provides a light wrapper
-around \code{rep_sample_n()} that has a more similar interface to \code{slice_sample()}
+around \code{rep_sample_n()} that has a more similar interface to \code{slice_sample()}.
 }
 \examples{
-suppressPackageStartupMessages(library(dplyr))
-suppressPackageStartupMessages(library(ggplot2))
-
-# A virtual population of N = 10,010, of which 3091 are hurricanes
-population <- dplyr::storms \%>\%
-  select(status)
+library(dplyr)
+library(ggplot2)
 
-# Take samples of size n = 50 storms without replacement; do this 1000 times
-samples <- population \%>\%
+# take 1000 samples of size n = 50, without replacement
+resamples <- gss \%>\%
   rep_sample_n(size = 50, reps = 1000)
-samples
+  
+resamples
 
-# Compute p_hats for all 1000 samples = proportion hurricanes
-p_hats <- samples \%>\%
+# compute the proportion of respondents with a college
+# degree in each replicate
+p_hats <- resamples \%>\%
   group_by(replicate) \%>\%
-  summarize(prop_hurricane = mean(status == "hurricane"))
-p_hats
+  summarize(prop_college = mean(college == "degree")) 
 
-# Plot sampling distribution
-ggplot(p_hats, aes(x = prop_hurricane)) +
+# plot sampling distribution
+ggplot(p_hats, aes(x = prop_college)) +
   geom_density() +
   labs(x = "p_hat", y = "Number of samples",
-  title = "Sampling distribution of p_hat from 1000 samples of size 50")
+  title = "Sampling distribution of p_hat")
 }
diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R
index 0cf860fb..fb02d145 100644
--- a/tests/testthat/test-rep_sample_n.R
+++ b/tests/testthat/test-rep_sample_n.R
@@ -6,9 +6,68 @@ population <- tibble::tibble(
   color = factor(c(rep("red", 3), rep("white", N - 3)))
 )
 
-test_that("rep_sample_n works", {
-  expect_silent(test_rep <- population %>% rep_sample_n(size = 2, reps = 10))
+test_that("rep_sample_n is sensitive to the size argument", {
+  set.seed(1)
+  reps <- 10
+  s1 <- 2
+  s2 <- 3
   
+  res1 <- population %>% rep_sample_n(size = s1, reps = reps)
+  res2 <- population %>% rep_sample_n(size = s2, reps = reps)
+
+  expect_equal(ncol(res1), ncol(res2))
+  expect_equal(ncol(res1), 3)
+  
+  expect_equal(nrow(res1) / s1, nrow(res2) / s2)
+  expect_equal(nrow(res1), reps * s1)
+})
+
+test_that("rep_sample_n is sensitive to the reps argument", {
+  set.seed(1)
+  r1 <- 10
+  r2 <- 5
+  size <- 2
+  
+  res1 <- population %>% rep_sample_n(size = size, reps = r1)
+  res2 <- population %>% rep_sample_n(size = size, reps = r2)
+  
+  expect_equal(ncol(res1), ncol(res2))
+  expect_equal(ncol(res1), 3)
+  
+  expect_equal(nrow(res1) / r1, nrow(res2) / r2)
+  expect_equal(nrow(res1), reps * s1)
+})
+
+test_that("rep_sample_n is sensitive to the replace argument", {
+  set.seed(1)
+  res1 <- population %>% rep_sample_n(size = 5, reps = 100, replace = TRUE)
+  
+  set.seed(1)
+  res2 <- population %>% rep_sample_n(size = 5, reps = 100, replace = FALSE)
+  
+  expect_true(all(res1$replicate == res2$replicate))
+  expect_false(all(res1$ball_id == res2$ball_id))
+  expect_false(all(res1$color == res2$color))
+  
+  expect_equal(ncol(res1), ncol(res2))
+  expect_equal(ncol(res1), 3)
+})
+
+test_that("rep_sample_n is sensitive to the prob argument", {
+  set.seed(1)
+  res1 <- population %>% 
+    rep_sample_n(
+      size = 5, 
+      reps = 100, 
+      replace = TRUE,
+      prob = c(1, rep(0, 4))
+    )
+  
+  expect_true(all(res1$ball_id == 1))
+  expect_true(all(res1$color == "red"))
+})
+
+test_that("rep_sample_n errors with bad arguments", {
   expect_error(
     population %>%
       rep_sample_n(size = 2, reps = 10, prob = rep(x = 1/5, times = 100))
@@ -19,9 +78,23 @@ test_that("rep_sample_n works", {
       rep_sample_n(size = 2, reps = 10, prob = c(1/2, 1/2))
   )
   
-  expect_equal(c("replicate", names(population)), names(test_rep))
+  expect_error(
+    population %>%
+      rep_sample_n(size = "a lot", reps = 10)
+  )
+  
+  expect_error(
+    population %>%
+      rep_sample_n(size = 2, reps = "a lot")
+  )
 })
 
 test_that("rep_slice_sample works", {
+  set.seed(1)
+  res1 <- rep_sample_n(population, size = 2, reps = 5, prob = rep(1/N, N))
+  
+  set.seed(1)
+  res2 <- rep_slice_sample(population, n = 2, reps = 5, weight_by = rep(1/N, N))
   
+  expect_equal(res1, res2)
 })

From 96048bf7ce9876422a1c528e9a6ffed9fe3d9cbc Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Fri, 24 Jul 2020 16:23:02 -0700
Subject: [PATCH 03/13] fix test failure

---
 tests/testthat/test-rep_sample_n.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R
index fb02d145..cab65d51 100644
--- a/tests/testthat/test-rep_sample_n.R
+++ b/tests/testthat/test-rep_sample_n.R
@@ -35,7 +35,7 @@ test_that("rep_sample_n is sensitive to the reps argument", {
   expect_equal(ncol(res1), 3)
   
   expect_equal(nrow(res1) / r1, nrow(res2) / r2)
-  expect_equal(nrow(res1), reps * s1)
+  expect_equal(nrow(res1), r1 * size)
 })
 
 test_that("rep_sample_n is sensitive to the replace argument", {

From bd7e72ff0c8348e66be07e065ec31a4d4d2e2bfa Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Fri, 24 Jul 2020 16:52:19 -0700
Subject: [PATCH 04/13] rename/order arguments re: dplyr::slice_sample()

---
 R/rep_sample_n.R    | 5 +++--
 man/rep_sample_n.Rd | 4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index 4edffd78..0ebcd9f2 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -13,6 +13,7 @@
 #' @param reps Number of samples of size n = `size` to take.
 #' @param prob A vector of sampling weights for each of the rows in `tbl`—must
 #'   have length equal to `nrow(tbl)`.
+#' @param .data Data frame of population from which to sample.
 #' @param n Sample size of each sample.
 #' @param weight_by A vector of sampling weights for each of the rows 
 #'   in `tbl`—must have length equal to `nrow(tbl)`.
@@ -70,6 +71,6 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
 
 #' @rdname rep_sample_n
 #' @export
-rep_slice_sample <- function(tbl, n, replace = FALSE, reps = 1, weight_by = NULL) {
-  rep_sample_n(tbl, n, replace, reps, weight_by)
+rep_slice_sample <- function(.data, n = 1, replace = FALSE, weight_by = NULL, reps = 1) {
+  rep_sample_n(.data, n, replace, reps, weight_by)
 }
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index 460493f6..7c7cad32 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -7,7 +7,7 @@
 \usage{
 rep_sample_n(tbl, size, replace = FALSE, reps = 1, prob = NULL)
 
-rep_slice_sample(tbl, n, replace = FALSE, reps = 1, weight_by = NULL)
+rep_slice_sample(.data, n = 1, replace = FALSE, weight_by = NULL, reps = 1)
 }
 \arguments{
 \item{tbl}{Data frame of population from which to sample.}
@@ -21,6 +21,8 @@ rep_slice_sample(tbl, n, replace = FALSE, reps = 1, weight_by = NULL)
 \item{prob}{A vector of sampling weights for each of the rows in \code{tbl}—must
 have length equal to \code{nrow(tbl)}.}
 
+\item{.data}{Data frame of population from which to sample.}
+
 \item{n}{Sample size of each sample.}
 
 \item{weight_by}{A vector of sampling weights for each of the rows

From 149bb132079881cfb206e2d5d393619d92432a7d Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Fri, 24 Jul 2020 17:06:57 -0700
Subject: [PATCH 05/13] update NEWS

---
 NEWS.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 96772352..76aab3a2 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,9 @@
 # infer 0.5.3.9000 (development version)
 
-To be released as 0.5.4.
+- `rep_sample_n()` no longer errors when supplied a `prob` argument (#279)
+- Added `rep_slice_sample()`, a light wrapper around `rep_sample_n()`, that
+more closely resembles `dplyr::slice_sample()` (the function that supersedes)
+`dplyr::sample_n()` (#325)
 
 # infer 0.5.3
 

From ede7bcc7593cf0eee8943d58f77e757970c1c3ec Mon Sep 17 00:00:00 2001
From: "Simon P. Couch" <simonpatrickcouch@gmail.com>
Date: Fri, 24 Jul 2020 19:21:49 -0700
Subject: [PATCH 06/13] rename resamples -> slices, adjust docs

document argument aliases in the same param rather than duplicating descriptions
---
 R/rep_sample_n.R    | 16 ++++++----------
 man/rep_sample_n.Rd | 19 ++++++-------------
 2 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index 0ebcd9f2..e5c18f64 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -7,16 +7,12 @@
 #' This operation is especially helpful while creating sampling 
 #' distributions—see the examples below!
 #'
-#' @param tbl Data frame of population from which to sample.
-#' @param size Sample size of each sample.
+#' @param tbl,.data Data frame of population from which to sample.
+#' @param size,n Sample size of each sample.
 #' @param replace Should sampling be with replacement?
 #' @param reps Number of samples of size n = `size` to take.
-#' @param prob A vector of sampling weights for each of the rows in `tbl`—must
+#' @param prob,weight_by A vector of sampling weights for each of the rows in `tbl`—must
 #'   have length equal to `nrow(tbl)`.
-#' @param .data Data frame of population from which to sample.
-#' @param n Sample size of each sample.
-#' @param weight_by A vector of sampling weights for each of the rows 
-#'   in `tbl`—must have length equal to `nrow(tbl)`.
 #'
 #' @return A tibble of size `rep * size` rows corresponding to `reps`
 #'   samples of size `size` from `tbl`, grouped by `replicate`.
@@ -31,14 +27,14 @@
 #' library(ggplot2)
 #' 
 #' # take 1000 samples of size n = 50, without replacement
-#' resamples <- gss %>%
+#' slices <- gss %>%
 #'   rep_sample_n(size = 50, reps = 1000)
 #'   
-#' resamples
+#' slices
 #'
 #' # compute the proportion of respondents with a college
 #' # degree in each replicate
-#' p_hats <- resamples %>%
+#' p_hats <- slices %>%
 #'   group_by(replicate) %>%
 #'   summarize(prop_college = mean(college == "degree")) 
 #'
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index 7c7cad32..752f5d66 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -10,23 +10,16 @@ rep_sample_n(tbl, size, replace = FALSE, reps = 1, prob = NULL)
 rep_slice_sample(.data, n = 1, replace = FALSE, weight_by = NULL, reps = 1)
 }
 \arguments{
-\item{tbl}{Data frame of population from which to sample.}
+\item{tbl, .data}{Data frame of population from which to sample.}
 
-\item{size}{Sample size of each sample.}
+\item{size, n}{Sample size of each sample.}
 
 \item{replace}{Should sampling be with replacement?}
 
 \item{reps}{Number of samples of size n = \code{size} to take.}
 
-\item{prob}{A vector of sampling weights for each of the rows in \code{tbl}—must
+\item{prob, weight_by}{A vector of sampling weights for each of the rows in \code{tbl}—must
 have length equal to \code{nrow(tbl)}.}
-
-\item{.data}{Data frame of population from which to sample.}
-
-\item{n}{Sample size of each sample.}
-
-\item{weight_by}{A vector of sampling weights for each of the rows
-in \code{tbl}—must have length equal to \code{nrow(tbl)}.}
 }
 \value{
 A tibble of size \code{rep * size} rows corresponding to \code{reps}
@@ -49,14 +42,14 @@ library(dplyr)
 library(ggplot2)
 
 # take 1000 samples of size n = 50, without replacement
-resamples <- gss \%>\%
+slices <- gss \%>\%
   rep_sample_n(size = 50, reps = 1000)
   
-resamples
+slices
 
 # compute the proportion of respondents with a college
 # degree in each replicate
-p_hats <- resamples \%>\%
+p_hats <- slices \%>\%
   group_by(replicate) \%>\%
   summarize(prop_college = mean(college == "degree")) 
 

From 6856cc47bc65e08cf2d1a8253238894fa98ef9d7 Mon Sep 17 00:00:00 2001
From: echasnovski <evgeni.chasnovski@gmail.com>
Date: Sat, 25 Jul 2020 12:40:15 +0300
Subject: [PATCH 07/13] Lint 'rep_sample_n.R'.

---
 R/rep_sample_n.R    | 36 ++++++++++++++++++++----------------
 man/rep_sample_n.Rd | 23 +++++++++++++----------
 2 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index e5c18f64..d136150f 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -2,47 +2,50 @@
 #'
 #' @description
 #'
-#' These functions extend the functionality of [dplyr::sample_n()] and 
+#' These functions extend the functionality of [dplyr::sample_n()] and
 #' [dplyr::slice_sample()] by allowing for repeated sampling of data.
-#' This operation is especially helpful while creating sampling 
+#' This operation is especially helpful while creating sampling
 #' distributions—see the examples below!
 #'
 #' @param tbl,.data Data frame of population from which to sample.
 #' @param size,n Sample size of each sample.
 #' @param replace Should sampling be with replacement?
 #' @param reps Number of samples of size n = `size` to take.
-#' @param prob,weight_by A vector of sampling weights for each of the rows in `tbl`—must
-#'   have length equal to `nrow(tbl)`.
+#' @param prob,weight_by A vector of sampling weights for each of the rows in
+#' `tbl`—must have length equal to `nrow(tbl)`.
 #'
 #' @return A tibble of size `rep * size` rows corresponding to `reps`
 #'   samples of size `size` from `tbl`, grouped by `replicate`.
-#'   
-#' @details The [dplyr::sample_n()] function from that `rep_sample_n()` function 
-#' was originally written to supplement has been superseded 
-#' by [dplyr::slice_sample()]. `rep_slice_sample()` provides a light wrapper
-#' around `rep_sample_n()` that has a more similar interface to `slice_sample()`.
+#'
+#' @details The [dplyr::sample_n()] function from that `rep_sample_n()`
+#' function was originally written to supplement has been superseded by
+#' [dplyr::slice_sample()]. `rep_slice_sample()` provides a light wrapper
+#' around `rep_sample_n()` that has a more similar interface to
+#' `slice_sample()`.
 #'
 #' @examples
 #' library(dplyr)
 #' library(ggplot2)
-#' 
+#'
 #' # take 1000 samples of size n = 50, without replacement
 #' slices <- gss %>%
 #'   rep_sample_n(size = 50, reps = 1000)
-#'   
+#'
 #' slices
 #'
 #' # compute the proportion of respondents with a college
 #' # degree in each replicate
 #' p_hats <- slices %>%
 #'   group_by(replicate) %>%
-#'   summarize(prop_college = mean(college == "degree")) 
+#'   summarize(prop_college = mean(college == "degree"))
 #'
 #' # plot sampling distribution
 #' ggplot(p_hats, aes(x = prop_college)) +
 #'   geom_density() +
-#'   labs(x = "p_hat", y = "Number of samples",
-#'   title = "Sampling distribution of p_hat")
+#'   labs(
+#'     x = "p_hat", y = "Number of samples",
+#'     title = "Sampling distribution of p_hat"
+#'   )
 #' @export
 rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
   check_type(tbl, is.data.frame)
@@ -59,7 +62,7 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
         dplyr::slice_sample(n = size, weight_by = prob, replace = replace)
     ) %>%
     dplyr::mutate(
-      replicate = rep(1:reps, each = size), 
+      replicate = rep(1:reps, each = size),
       .before = dplyr::everything()
     ) %>%
     dplyr::group_by(replicate)
@@ -67,6 +70,7 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
 
 #' @rdname rep_sample_n
 #' @export
-rep_slice_sample <- function(.data, n = 1, replace = FALSE, weight_by = NULL, reps = 1) {
+rep_slice_sample <- function(.data, n = 1, replace = FALSE, weight_by = NULL,
+                             reps = 1) {
   rep_sample_n(.data, n, replace, reps, weight_by)
 }
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index 752f5d66..7b2d9a5a 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -18,8 +18,8 @@ rep_slice_sample(.data, n = 1, replace = FALSE, weight_by = NULL, reps = 1)
 
 \item{reps}{Number of samples of size n = \code{size} to take.}
 
-\item{prob, weight_by}{A vector of sampling weights for each of the rows in \code{tbl}—must
-have length equal to \code{nrow(tbl)}.}
+\item{prob, weight_by}{A vector of sampling weights for each of the rows in
+\code{tbl}—must have length equal to \code{nrow(tbl)}.}
 }
 \value{
 A tibble of size \code{rep * size} rows corresponding to \code{reps}
@@ -32,10 +32,11 @@ This operation is especially helpful while creating sampling
 distributions—see the examples below!
 }
 \details{
-The \code{\link[dplyr:sample_n]{dplyr::sample_n()}} function from that \code{rep_sample_n()} function
-was originally written to supplement has been superseded
-by \code{\link[dplyr:slice]{dplyr::slice_sample()}}. \code{rep_slice_sample()} provides a light wrapper
-around \code{rep_sample_n()} that has a more similar interface to \code{slice_sample()}.
+The \code{\link[dplyr:sample_n]{dplyr::sample_n()}} function from that \code{rep_sample_n()}
+function was originally written to supplement has been superseded by
+\code{\link[dplyr:slice]{dplyr::slice_sample()}}. \code{rep_slice_sample()} provides a light wrapper
+around \code{rep_sample_n()} that has a more similar interface to
+\code{slice_sample()}.
 }
 \examples{
 library(dplyr)
@@ -44,18 +45,20 @@ library(ggplot2)
 # take 1000 samples of size n = 50, without replacement
 slices <- gss \%>\%
   rep_sample_n(size = 50, reps = 1000)
-  
+
 slices
 
 # compute the proportion of respondents with a college
 # degree in each replicate
 p_hats <- slices \%>\%
   group_by(replicate) \%>\%
-  summarize(prop_college = mean(college == "degree")) 
+  summarize(prop_college = mean(college == "degree"))
 
 # plot sampling distribution
 ggplot(p_hats, aes(x = prop_college)) +
   geom_density() +
-  labs(x = "p_hat", y = "Number of samples",
-  title = "Sampling distribution of p_hat")
+  labs(
+    x = "p_hat", y = "Number of samples",
+    title = "Sampling distribution of p_hat"
+  )
 }

From 7a13f51fd3b62ce7743472f320ea81d0b6d5ec45 Mon Sep 17 00:00:00 2001
From: echasnovski <evgeni.chasnovski@gmail.com>
Date: Sat, 25 Jul 2020 13:48:14 +0300
Subject: [PATCH 08/13] Update/revert `rep_sample_n()` implementation.

---
 R/rep_sample_n.R | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index d136150f..e11a83f3 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -54,17 +54,27 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
   check_type(reps, is.numeric)
   if (!is.null(prob)) {
     check_type(prob, is.numeric)
+    if (length(prob) != nrow(tbl)) {
+      stop_glue(
+        "The argument `prob` must have length `nrow(tbl)` = {nrow(tbl)}"
+      )
+    }
   }
 
-  1:reps %>%
-    purrr::map_dfr(
-      ~ tbl %>%
-        dplyr::slice_sample(n = size, weight_by = prob, replace = replace)
-    ) %>%
-    dplyr::mutate(
-      replicate = rep(1:reps, each = size),
-      .before = dplyr::everything()
-    ) %>%
+  # Generate row indexes for every future replicate (this way it respects
+  # possibility of  `replace = FALSE`)
+  n <- nrow(tbl)
+  i <- unlist(replicate(
+    reps,
+    sample.int(n, size, replace = replace, prob = prob),
+    simplify = FALSE
+  ))
+
+  tbl %>%
+    dplyr::slice(i) %>%
+    dplyr::mutate(replicate = rep(seq_len(reps), each = size)) %>%
+    dplyr::select(replicate, dplyr::everything()) %>%
+    tibble::as_tibble() %>%
     dplyr::group_by(replicate)
 }
 

From 7b376b0de6857424be4457019ca4fb52d5e95d91 Mon Sep 17 00:00:00 2001
From: echasnovski <evgeni.chasnovski@gmail.com>
Date: Sat, 25 Jul 2020 14:01:24 +0300
Subject: [PATCH 09/13] Update `rep_sample_n()` tests.

---
 tests/testthat/test-rep_sample_n.R | 46 ++++++++++++++++--------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R
index cab65d51..a79bce0f 100644
--- a/tests/testthat/test-rep_sample_n.R
+++ b/tests/testthat/test-rep_sample_n.R
@@ -11,13 +11,13 @@ test_that("rep_sample_n is sensitive to the size argument", {
   reps <- 10
   s1 <- 2
   s2 <- 3
-  
+
   res1 <- population %>% rep_sample_n(size = s1, reps = reps)
   res2 <- population %>% rep_sample_n(size = s2, reps = reps)
 
   expect_equal(ncol(res1), ncol(res2))
   expect_equal(ncol(res1), 3)
-  
+
   expect_equal(nrow(res1) / s1, nrow(res2) / s2)
   expect_equal(nrow(res1), reps * s1)
 })
@@ -27,13 +27,13 @@ test_that("rep_sample_n is sensitive to the reps argument", {
   r1 <- 10
   r2 <- 5
   size <- 2
-  
+
   res1 <- population %>% rep_sample_n(size = size, reps = r1)
   res2 <- population %>% rep_sample_n(size = size, reps = r2)
-  
+
   expect_equal(ncol(res1), ncol(res2))
   expect_equal(ncol(res1), 3)
-  
+
   expect_equal(nrow(res1) / r1, nrow(res2) / r2)
   expect_equal(nrow(res1), r1 * size)
 })
@@ -41,28 +41,32 @@ test_that("rep_sample_n is sensitive to the reps argument", {
 test_that("rep_sample_n is sensitive to the replace argument", {
   set.seed(1)
   res1 <- population %>% rep_sample_n(size = 5, reps = 100, replace = TRUE)
-  
+
   set.seed(1)
   res2 <- population %>% rep_sample_n(size = 5, reps = 100, replace = FALSE)
-  
+
   expect_true(all(res1$replicate == res2$replicate))
   expect_false(all(res1$ball_id == res2$ball_id))
   expect_false(all(res1$color == res2$color))
-  
+
   expect_equal(ncol(res1), ncol(res2))
   expect_equal(ncol(res1), 3)
+
+  # Check if there are actually no duplicates in case `replace = FALSE`
+  no_duplicates <- all(tapply(res2$ball_id, res2$replicate, anyDuplicated) == 0)
+  expect_true(no_duplicates)
 })
 
 test_that("rep_sample_n is sensitive to the prob argument", {
   set.seed(1)
-  res1 <- population %>% 
+  res1 <- population %>%
     rep_sample_n(
-      size = 5, 
-      reps = 100, 
+      size = 5,
+      reps = 100,
       replace = TRUE,
       prob = c(1, rep(0, 4))
     )
-  
+
   expect_true(all(res1$ball_id == 1))
   expect_true(all(res1$color == "red"))
 })
@@ -70,19 +74,19 @@ test_that("rep_sample_n is sensitive to the prob argument", {
 test_that("rep_sample_n errors with bad arguments", {
   expect_error(
     population %>%
-      rep_sample_n(size = 2, reps = 10, prob = rep(x = 1/5, times = 100))
+      rep_sample_n(size = 2, reps = 10, prob = rep(x = 1 / 5, times = 100))
   )
-  
+
   expect_error(
     population %>%
-      rep_sample_n(size = 2, reps = 10, prob = c(1/2, 1/2))
+      rep_sample_n(size = 2, reps = 10, prob = c(1 / 2, 1 / 2))
   )
-  
+
   expect_error(
     population %>%
       rep_sample_n(size = "a lot", reps = 10)
   )
-  
+
   expect_error(
     population %>%
       rep_sample_n(size = 2, reps = "a lot")
@@ -91,10 +95,10 @@ test_that("rep_sample_n errors with bad arguments", {
 
 test_that("rep_slice_sample works", {
   set.seed(1)
-  res1 <- rep_sample_n(population, size = 2, reps = 5, prob = rep(1/N, N))
-  
+  res1 <- rep_sample_n(population, size = 2, reps = 5, prob = rep(1 / N, N))
+
   set.seed(1)
-  res2 <- rep_slice_sample(population, n = 2, reps = 5, weight_by = rep(1/N, N))
-  
+  res2 <- rep_slice_sample(population, n = 2, reps = 5, weight_by = rep(1 / N, N))
+
   expect_equal(res1, res2)
 })

From 8519b79fe786344339e13fbe0e05aec47c27c235 Mon Sep 17 00:00:00 2001
From: echasnovski <evgeni.chasnovski@gmail.com>
Date: Sat, 25 Jul 2020 14:01:43 +0300
Subject: [PATCH 10/13] Tweak `rep_sample_n()` documentation.

---
 R/rep_sample_n.R    | 9 ++++-----
 man/rep_sample_n.Rd | 9 ++++-----
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index e11a83f3..ede89cb1 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -17,11 +17,10 @@
 #' @return A tibble of size `rep * size` rows corresponding to `reps`
 #'   samples of size `size` from `tbl`, grouped by `replicate`.
 #'
-#' @details The [dplyr::sample_n()] function from that `rep_sample_n()`
-#' function was originally written to supplement has been superseded by
-#' [dplyr::slice_sample()]. `rep_slice_sample()` provides a light wrapper
-#' around `rep_sample_n()` that has a more similar interface to
-#' `slice_sample()`.
+#' @details The [dplyr::sample_n()] function (to which `rep_sample_n()` was
+#' originally a supplement) has been superseded by [dplyr::slice_sample()].
+#' `rep_slice_sample()` provides a light wrapper around `rep_sample_n()` that
+#' has a more similar interface to `slice_sample()`.
 #'
 #' @examples
 #' library(dplyr)
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index 7b2d9a5a..ee7f396d 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -32,11 +32,10 @@ This operation is especially helpful while creating sampling
 distributions—see the examples below!
 }
 \details{
-The \code{\link[dplyr:sample_n]{dplyr::sample_n()}} function from that \code{rep_sample_n()}
-function was originally written to supplement has been superseded by
-\code{\link[dplyr:slice]{dplyr::slice_sample()}}. \code{rep_slice_sample()} provides a light wrapper
-around \code{rep_sample_n()} that has a more similar interface to
-\code{slice_sample()}.
+The \code{\link[dplyr:sample_n]{dplyr::sample_n()}} function (to which \code{rep_sample_n()} was
+originally a supplement) has been superseded by \code{\link[dplyr:slice]{dplyr::slice_sample()}}.
+\code{rep_slice_sample()} provides a light wrapper around \code{rep_sample_n()} that
+has a more similar interface to \code{slice_sample()}.
 }
 \examples{
 library(dplyr)

From 58457a228d60146b50ee9bddd73386ba393fb088 Mon Sep 17 00:00:00 2001
From: rudeboybert <albert.ys.kim@gmail.com>
Date: Sat, 25 Jul 2020 12:09:01 -0400
Subject: [PATCH 11/13] Added weighted sampling example to rep_sample_n()

---
 R/rep_sample_n.R    | 8 ++++++++
 man/rep_sample_n.Rd | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index ede89cb1..0ac05d84 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -45,6 +45,14 @@
 #'     x = "p_hat", y = "Number of samples",
 #'     title = "Sampling distribution of p_hat"
 #'   )
+#'   
+#' # sampling with probability weights. Note probabilities are automatically 
+#' # renormalized to sum to 1
+#' df <- tibble(
+#'   id = 1:5,
+#'   letter = factor(c("a", "b", "c", "d", "e"))
+#' )
+#' rep_sample_n(df, size = 4, reps = 2, prob = c(.5, .4, .3, .2, .1))
 #' @export
 rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
   check_type(tbl, is.data.frame)
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index ee7f396d..f5dd7d52 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -60,4 +60,12 @@ ggplot(p_hats, aes(x = prop_college)) +
     x = "p_hat", y = "Number of samples",
     title = "Sampling distribution of p_hat"
   )
+  
+# sampling with probability weights. Note probabilities are automatically 
+# renormalized to sum to 1
+df <- tibble(
+  id = 1:5,
+  letter = factor(c("a", "b", "c", "d", "e"))
+)
+rep_sample_n(df, size = 4, reps = 2, prob = c(.5, .4, .3, .2, .1))
 }

From 1863946b2569afe787b45a6ad0a323f83193c222 Mon Sep 17 00:00:00 2001
From: rudeboybert <albert.ys.kim@gmail.com>
Date: Sat, 25 Jul 2020 12:12:06 -0400
Subject: [PATCH 12/13] Added tibble pkg to ex

---
 R/rep_sample_n.R    | 1 +
 man/rep_sample_n.Rd | 1 +
 2 files changed, 2 insertions(+)

diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index 0ac05d84..504debd1 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -48,6 +48,7 @@
 #'   
 #' # sampling with probability weights. Note probabilities are automatically 
 #' # renormalized to sum to 1
+#' library(tibble)
 #' df <- tibble(
 #'   id = 1:5,
 #'   letter = factor(c("a", "b", "c", "d", "e"))
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index f5dd7d52..13499145 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -63,6 +63,7 @@ ggplot(p_hats, aes(x = prop_college)) +
   
 # sampling with probability weights. Note probabilities are automatically 
 # renormalized to sum to 1
+library(tibble)
 df <- tibble(
   id = 1:5,
   letter = factor(c("a", "b", "c", "d", "e"))

From 6e6c5121fa733f59535e2d6ee4282d2b40e122f4 Mon Sep 17 00:00:00 2001
From: rudeboybert <albert.ys.kim@gmail.com>
Date: Sat, 25 Jul 2020 12:29:36 -0400
Subject: [PATCH 13/13] Lowered (w/o replacement) sample size & increased
 replicate number to make weighting more prominent

---
 R/rep_sample_n.R    | 2 +-
 man/rep_sample_n.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R
index 504debd1..ee7323de 100644
--- a/R/rep_sample_n.R
+++ b/R/rep_sample_n.R
@@ -53,7 +53,7 @@
 #'   id = 1:5,
 #'   letter = factor(c("a", "b", "c", "d", "e"))
 #' )
-#' rep_sample_n(df, size = 4, reps = 2, prob = c(.5, .4, .3, .2, .1))
+#' rep_sample_n(df, size = 2, reps = 5, prob = c(.5, .4, .3, .2, .1))
 #' @export
 rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) {
   check_type(tbl, is.data.frame)
diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd
index 13499145..99482370 100644
--- a/man/rep_sample_n.Rd
+++ b/man/rep_sample_n.Rd
@@ -68,5 +68,5 @@ df <- tibble(
   id = 1:5,
   letter = factor(c("a", "b", "c", "d", "e"))
 )
-rep_sample_n(df, size = 4, reps = 2, prob = c(.5, .4, .3, .2, .1))
+rep_sample_n(df, size = 2, reps = 5, prob = c(.5, .4, .3, .2, .1))
 }