epiverse-trace · joshwlambert · Oct 8, 2024 · Aug 20, 2024 · Aug 20, 2024 · Aug 20, 2024
diff --git a/R/coercion.R b/R/coercion.R
@@ -99,9 +99,9 @@ as.data.frame.multi_epiparameter <- function(x, ...) {
 #' as it will be matched by name by `$`.
 #'
 #' To specify a probability distribution pass a `character` string to the
-#' function via the `...` argument. The argument should be called `prob_dist`.
-#' For example, to specify a gamma distribution:
-#' `as_epiparameter(x, prob_dist = "gamma")`.
+#' function via the `...` argument. The argument should be called
+#' `prob_distribution`. For example, to specify a gamma distribution:
+#' `as_epiparameter(x, prob_distribution = "gamma")`.
 #'
 #' ***Warning***: distributions specified via the `prob_dist` argument will
 #' overwrite the probability distribution specified in the `x` argument. For
@@ -247,7 +247,7 @@ is_epiparameter_df <- function(x) {
   # capture dots and extract article info if supplied
   dots <- list(...)
   article <- dots$article
-  prob_dist_in <- dots$prob_dist
+  prob_dist_in <- dots$prob_distribution
   # validate multi-row entries
   if (nrow(x) > 1) {
     stopifnot(

diff --git a/README.md b/README.md
@@ -55,15 +55,15 @@ To load the library of epidemiological parameters into `R`:
 
 ``` r
 epiparameters <- epiparameter_db()
-#> Returning 122 results that match the criteria (99 are parameterised). 
+#> Returning 125 results that match the criteria (100 are parameterised). 
 #> Use subset to filter by entry variables or single_epiparameter to return a single entry. 
 #> To retrieve the citation for each use the 'get_citation' function
 epiparameters
-#> # List of 122 <epiparameter> objects
+#> # List of 125 <epiparameter> objects
 #> Number of diseases: 23
 #> ❯ Adenovirus ❯ Chikungunya ❯ COVID-19 ❯ Dengue ❯ Ebola Virus Disease ❯ Hantavirus Pulmonary Syndrome ❯ Human Coronavirus ❯ Influenza ❯ Japanese Encephalitis ❯ Marburg Virus Disease ❯ Measles ❯ MERS ❯ Mpox ❯ Parainfluenza ❯ Pneumonic Plague ❯ Rhinovirus ❯ Rift Valley Fever ❯ RSV ❯ SARS ❯ Smallpox ❯ West Nile Fever ❯ Yellow Fever ❯ Zika Virus Disease
-#> Number of epi distributions: 12
-#> ❯ generation time ❯ hospitalisation to death ❯ hospitalisation to discharge ❯ incubation period ❯ notification to death ❯ notification to discharge ❯ offspring distribution ❯ onset to death ❯ onset to discharge ❯ onset to hospitalisation ❯ onset to ventilation ❯ serial interval
+#> Number of epi distributions: 13
+#> ❯ case fatality risk ❯ generation time ❯ hospitalisation to death ❯ hospitalisation to discharge ❯ incubation period ❯ notification to death ❯ notification to discharge ❯ offspring distribution ❯ onset to death ❯ onset to discharge ❯ onset to hospitalisation ❯ onset to ventilation ❯ serial interval
 #> [[1]]
 #> Disease: Adenovirus
 #> Pathogen: Adenovirus
@@ -75,8 +75,8 @@ epiparameters
 #> <https://doi.org/10.1016/S1473-3099%2809%2970069-6>.
 #> Distribution: lnorm
 #> Parameters:
-#>   meanlog: 1.247
-#>   sdlog: 0.975
+#>   meanlog: 1.723
+#>   sdlog: 0.231
 #> 
 #> [[2]]
 #> Disease: Human Coronavirus
@@ -89,8 +89,8 @@ epiparameters
 #> <https://doi.org/10.1016/S1473-3099%2809%2970069-7>.
 #> Distribution: lnorm
 #> Parameters:
-#>   meanlog: 0.742
-#>   sdlog: 0.918
+#>   meanlog: 1.163
+#>   sdlog: 0.140
 #> 
 #> [[3]]
 #> Disease: SARS
@@ -103,10 +103,10 @@ epiparameters
 #> <https://doi.org/10.1016/S1473-3099%2809%2970069-8>.
 #> Distribution: lnorm
 #> Parameters:
-#>   meanlog: 0.660
-#>   sdlog: 1.205
+#>   meanlog: 1.386
+#>   sdlog: 0.593
 #> 
-#> # ℹ 119 more elements
+#> # ℹ 122 more elements
 #> # ℹ Use `print(n = ...)` to see more elements.
 #> # ℹ Use `parameter_tbl()` to see a summary table of the parameters.
 #> # ℹ Explore database online at: https://epiverse-trace.github.io/epiparameter/articles/database.html
@@ -161,7 +161,7 @@ the data, and offers the ability to subset you data by `disease`,
 ``` r
 parameter_tbl(epiparameters)
 #> # Parameter table:
-#> # A data frame:    122 × 7
+#> # A data frame:    125 × 7
 #>    disease  pathogen epi_distribution prob_distribution author  year sample_size
 #>    <chr>    <chr>    <chr>            <chr>             <chr>  <dbl>       <dbl>
 #>  1 Adenovi… Adenovi… incubation peri… lnorm             Lessl…  2009          14
@@ -174,7 +174,7 @@ parameter_tbl(epiparameters)
 #>  8 Parainf… Parainf… incubation peri… lnorm             Lessl…  2009          11
 #>  9 RSV      RSV      incubation peri… lnorm             Lessl…  2009          24
 #> 10 Rhinovi… Rhinovi… incubation peri… lnorm             Lessl…  2009          28
-#> # ℹ 112 more rows
+#> # ℹ 115 more rows
 parameter_tbl(
   epiparameters,
   epi_dist = "onset to hospitalisation"

diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -22,6 +22,7 @@ etc
 EVD
 facetted
 jsonlite
+Lassa
 Lifecycle
 Marburg
 md

diff --git a/man/as_epiparameter.Rd b/man/as_epiparameter.Rd
diff --git a/tests/testthat/test-coercion.R b/tests/testthat/test-coercion.R
@@ -97,7 +97,7 @@ test_that("as_epiparameter works for ebola SI assumed prob_dist (issue #310)", {
   # suppress warning and message about citation
   ebola_serial_epiparameter <- suppressWarnings(
     suppressMessages(
-      as_epiparameter(ebola_serial, prob_dist = "gamma")
+      as_epiparameter(ebola_serial, prob_distribution = "gamma")
     )
   )
   expect_s3_class(ebola_serial_epiparameter, class = "epiparameter")
@@ -122,7 +122,7 @@ test_that("as_epiparameter works for lassa incubation overwritten prob_dist", {
   # suppress warning and message about citation
   lassa_incub_epiparameter <- suppressWarnings(
     suppressMessages(
-      as_epiparameter(lassa_incub, prob_dist = "lnorm")
+      as_epiparameter(lassa_incub, prob_distribution = "lnorm")
     )
   )
   expect_s3_class(lassa_incub_epiparameter, class = "epiparameter")
@@ -155,7 +155,7 @@ test_that("as_epiparameter works for overwritten prob_dist with same parameters"
   )
   ebola_si_weibull <- suppressWarnings(
     suppressMessages(
-      as_epiparameter(ebola_si, prob_dist = "weibull")
+      as_epiparameter(ebola_si, prob_distribution = "weibull")
     )
   )
   expect_s3_class(ebola_si_gamma, class = "epiparameter")
@@ -179,7 +179,7 @@ test_that("as_epiparameter fails as expected with overwritten prob_dist", {
   expect_error(
     suppressWarnings(
       suppressMessages(
-         as_epiparameter(ebola_si, prob_dist = "lnorm")
+         as_epiparameter(ebola_si, prob_distribution = "lnorm")
       )
     ),
     regexp = "Incorrect parameters provided for probability distribution."

diff --git a/vignettes/articles/data_from_epireview.Rmd b/vignettes/articles/data_from_epireview.Rmd
@@ -43,22 +43,19 @@ We will start by just using the epidemiological parameter table to convert infor
 marburg_params <- marburg_data$params
 ```
 
-Given that currently only delay distributions are supported for the conversion (this feature is still under active development) we will filter to only include these.
+Out of these parameters, subset the data to only keep those rows that contain incubation periods for Marburg.
 
 ```{r, subset-marburg-params}
-delay_dist_rows <- grepl(
-  pattern = "Human delay",
-  x = marburg_params$parameter_type,
-  fixed = TRUE
-)
-marburg_params <- marburg_params[delay_dist_rows, ]
-marburg_params
+marburg_incubation_period <- marburg_params[
+  marburg_params$parameter_type_short == "incubation_period",
+]
+marburg_incubation_period
 ```
 
-We will select the second entry, which is an incubation period, to use as the first example:
+We will select the first entry to use as the first example:
 
 ```{r, select-marburg-entry}
-marburg_incub <- marburg_params[2, ]
+marburg_incub <- marburg_incubation_period[1, ]
 marburg_incub
 ```
 
@@ -108,7 +105,7 @@ marburg_incub_article <- marburg_articles[article_row, ]
 marburg_incub_article
 ```
 
-Now we can repeat the example of converting to `<epiparameter>` as shown above, but this time pass the bibliographic information as well as the epidemiological parameter information to create a full citation. The bibliographic information needs to be passed with the `articles` argument. 
+Now we can repeat the example of converting to `<epiparameter>` as shown above, but this time pass the bibliographic information as well as the epidemiological parameter information to create a full citation. The bibliographic information needs to be passed with the `article` argument. 
 
 ```{r, convert-to-epiparameter-full-citation}
 marburg_incub_epiparameter <- as_epiparameter(
@@ -123,14 +120,14 @@ marburg_incub_epiparameter$citation
 ```
 
 ::: {.alert .alert-info}
-The `as_epiparameter()` function is an S3 generic. If you are not familiar with S3 object-oriented programming in R, then this detail is not important, however, it does mean that the `articles` argument is not explicitly in the function definition of `as_epiparameter()` (i.e. it will not show up on autocomplete when typing out the function and will not be shown if you read the function help page `?as_epiparameter()`). Instead, the argument is specified as part of the `...` argument. This is because the `articles` argument is only required when converting data from {epireview} into `<epiparameter>`, and other data that can be converted to `<epiparameter>` objects do not require this argument.
+The `as_epiparameter()` function is an S3 generic. If you are not familiar with S3 object-oriented programming in R, then this detail is not important, however, it does mean that the `article` argument is not explicitly in the function definition of `as_epiparameter()` (i.e. it will not show up on autocomplete when typing out the function and will not be shown if you read the function help page `?as_epiparameter()`). Instead, the argument is specified as part of the `...` argument. This is because the `article` argument is only required when converting data from {epireview} into `<epiparameter>`, and other data that can be converted to `<epiparameter>` objects do not require this argument.
 :::
 
 ## Multi-row {epireview} entries
 
 The way the {epireview} data is stored means that some epidemiological parameter entries require multiple rows. This can be, for example, because they contain two summary statistics (e.g. mean and standard deviation) that are kept on separate rows. In order to create `<epiparameter>` objects that contains the full information for each entry multiple rows of the epidemiological parameters table from {epireview} can be given to `as_epiparameter()` to create a single `<epiparameter>` object.
 
-We can search which entries in the data have multiple rows by checking if there are duplicated parameter types and IDs. Remember that it is only possible to convert delay distributions into epiparameter objects (i.e. known as _Human delay_ parameter types in {epireview}), which we previously subset in this vignette. 
+We can search which entries in the data have multiple rows by checking if there are duplicated parameter types and IDs. Remember that it is only possible to convert delay distributions into epiparameter objects (i.e. known as _Human delay_ parameter types in {epireview}). 
 
 ```{r, check-multi-row-entries}
 multi_row_entries <- duplicated(marburg_params$parameter_type) &
@@ -141,18 +138,27 @@ multi_row_ids <- marburg_params$id[multi_row_entries]
 ```{r, subset-multi-row-entries}
 multi_row_marburg_params <-
   marburg_params[marburg_params$id %in% multi_row_ids, ]
+multi_row_marburg_params
 ```
 
-In this case there are two studies for Marburg with more than one entry (row) in the {epireview} database. Out of these studies, we select the first two rows, which contain the mean and standard deviation.
+In this case there are two studies for Marburg with more than one entry (row) in the {epireview} database. Out of these studies we select the mean and standard deviation.
 
 ```{r, check-multi-row-param-value-type}
 multi_row_marburg_params$parameter_value_type
 ```
 
-We use the first two rows of this subset table, which are the mean and standard deviation for the generation time of Marburg disease. This step should be verified manually to ensure that the entries that have been selected are indeed multiple rows for the same reported epidemiological parameter.
+In this case, we know that the mean and standard deviation from the chosen rows correspond to the same estimation process by having read the corresponding article. However, there is currently no identifiers on the {epireview} `params` database for Marburg, Ebola or Lassa to directly identify which of the two rows with mean values correspond to the standard deviation. The {epireview} team are currently working on rectifying this issue. 
+
+***Therefore, we encourage readers to manually verify their data subsets, to ensure that the entries that have been selected are indeed multiple rows for the same reported epidemiological parameter.***
+
+* For future {epireview} pathogens (excluding SARS) mean and standard deviation estimates that match will form one row in the `$params` database. Current software development at {epireview} is working on ensuring compatibility between these formats.
 
 ```{r, subset-multi-row-marburg-entry}
-marburg_gt <- multi_row_marburg_params[1:2, ]
+marburg_gt <- multi_row_marburg_params[
+  multi_row_marburg_params$parameter_data_id %in%
+    c("056a8d6b5f9aee3622d3bd8b715d4296", "ce3976e2e15df3f6fb92f6deb2db2a29"),
+]
+marburg_gt
 ```
 
 We can now convert this to an `<epiparameter>`.
@@ -171,26 +177,25 @@ For this example we will load the Ebola epidemiological parameters from the {epi
 ebola_data <- load_epidata("ebola")
 ```
 
-We will again subset the data to just use the epidemiological parameter table, and subset that table to just the delay distributions.
+We will again subset the data to just use the epidemiological parameter table, and select those rows containing a serial interval.
 
 ```{r, ebola-params}
 ebola_params <- ebola_data$params
 ```
 
 ```{r, subset-ebola-params}
-delay_dist_rows <- grepl(
-  pattern = "Human delay",
-  x = ebola_params$parameter_type,
-  fixed = TRUE
-)
-ebola_delays <- ebola_params[delay_dist_rows, ]
-ebola_delays
+ebola_si_rows <- ebola_params[
+  ebola_params$parameter_type_short == "serial_interval",
+]
+ebola_si_rows
 ```
 
-We will select the 358th entry, which is a serial interval, as this entry has estimated and reported a Weibull distribution:
+We will select an entry that has estimated and reported a Weibull distribution:
 
 ```{r, select-ebola-entry}
-ebola_si <- ebola_delays[358, ]
+ebola_si <-  ebola_si_rows[
+  ebola_si_rows$parameter_data_id == "0c3e02f80addfccc1017fa619fba76c5",
+]
 ebola_si
 ```
 
@@ -254,7 +259,7 @@ is_parameterised(ebola_si_epiparameter)
 Given that we can convert the mean and standard deviation into parameters of a probability distribution if we assume a distribution form, we can supply this data to `as_epiparameter()`. This uses the parameter conversion functions in {epiparameter} (see `vignette("extract_convert", package = "epiparameter")`).
 
 ```{r, convert-to-epiparameter-assumed-prob-dist}
-ebola_si_epiparameter <- as_epiparameter(ebola_si, prob_dist = "gamma")
+ebola_si_epiparameter <- as_epiparameter(ebola_si, prob_distribution = "gamma")
 ebola_si_epiparameter
 is_parameterised(ebola_si_epiparameter)
 ```
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,6 +22,7 @@ etc @@
     EVD
     facetted
     jsonlite
+    Lassa
     Lifecycle
     Marburg
     md
@@ Expand Down @@