Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use <bibentry> for handling citations #168

Merged
merged 7 commits into from
Aug 29, 2023
10 changes: 8 additions & 2 deletions R/accessors.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ get_citation <- function(x, ...) {

#' @export
get_citation.epidist <- function(x, ...) {
if (!inherits(x$citation, "bibentry")) {
stop("Citation should be a <bibentry>", call. = FALSE)
}

# return citation
x$citation
}
Expand All @@ -86,8 +90,10 @@ get_citation.epiparam <- function(x, ...) {
create_epidist_citation(
author = y$author,
year = y$year,
PMID = y$PMID,
DOI = y$DOI
title = y$title,
journal = y$journal,
DOI = y$DOI,
PMID = y$PMID
)
)
},
Expand Down
9 changes: 6 additions & 3 deletions R/epidist.R
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ epidist <- function(disease,
types = c("list", "double", "logical", "null"),
names = "unique"
)
checkmate::assert_string(citation)
checkmate::assert_class(citation, classes = "bibentry")
checkmate::assert_list(metadata)
checkmate::assert_list(method_assess)
checkmate::assert_number(truncation, na.ok = TRUE)
Expand Down Expand Up @@ -338,14 +338,17 @@ validate_epidist <- function(epidist) {
length(epidist$disease$disease) == 1,
"epidist must contain an epidemiological distribution" =
is.character(epidist$epi_dist) && length(epidist$epi_dist) == 1,
"epidist must contain a <distribution> or <distcrete> distribution or NA" =
inherits(epidist$prob_dist, c("distribution", "distcrete")) ||
is.na(epidist$prob_dist) || is.character(epidist$prob_dist),
"epidisit must contain uncertainty, summary stats and metadata" =
all(
is.list(epidist$uncertainty),
is.list(epidist$summary_stats),
is.list(epidist$metadata)
),
"epidist must contain a citation" =
is.character(epidist$citation)
inherits(epidist$citation, "bibentry")
)

invisible(epidist)
Expand Down Expand Up @@ -406,7 +409,7 @@ format.epidist <- function(x, header = TRUE, vb = NULL, ...) {
sprintf("Disease: %s", x$disease$disease),
sprintf("Pathogen: %s", x$disease$pathogen),
sprintf("Epi Distribution: %s", clean_epidist_name(x$epi_dist)),
sprintf("Study: %s", x$citation)
sprintf("Study: %s", format(x$citation))
)
)
}
Expand Down
85 changes: 43 additions & 42 deletions R/epidist_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -349,24 +349,30 @@ create_epidist_summary_stats <- function(mean = NA_real_,
)
}

#' A helper function when creating an epidist object to create a summary
#' statistics list with sensible defaults, type checking and arguments to help
#' remember which summary statistics can be accepted in the list

#' A helper function when creating an epidist object to create a citation list
#' with sensible defaults, type checking and arguments to help remember which
#' citation information is accepted in the list.
#' Create a citation for an epidist object
#'
#' @description A helper function when creating an epidist object to create a
#' citation list with sensible defaults, type checking and arguments to help
#' remember which citation information is accepted in the list.
#'
#' @param author A character string of the surname of the first author. This
#' @details This function acts as a wrapper around [`bibentry()`] to create
#' citations for sources reporting epidemiological parameters.
#'
#' @param author A `character` string of the surname of the first author. This
#' can be underscore separated from a second author, or underscore separated
#' from "etal" if there are more than two authors.
#' @param year A numeric of the year of publication
#' @param PMID A character string with the PubMed unique identifier number
#' @param year A `numeric` of the year of publication
#' @param title A `character` string with the title of the article that
#' published the epidemiological parameters.
#' @param journal A `character` string with the name of the journal that
#' published the article that published the epidemiological parameters.
#' This can also be a pre-print server, e.g., medRxiv.
#' @param PMID A `character` string with the PubMed unique identifier number
#' assigned to papers to give them a unique identifier within PubMed.
#' @param DOI A character string of the Digital Object Identifier (DOI)
#' @param DOI A `character` string of the Digital Object Identifier (DOI)
#' assigned to papers which are unique to each paper.
#'
#' @return A character string of the formatted short citation
#' @return A `bibentry` object of the citation
#' @export
#'
#' @examples
Expand All @@ -377,50 +383,45 @@ create_epidist_summary_stats <- function(mean = NA_real_,
#' )
create_epidist_citation <- function(author = NA_character_,
year = NA_integer_,
PMID = NA_character_,
DOI = NA_character_) {
title = NA_character_,
journal = NA_character_,
DOI = NA_character_,
PMID = NA_character_) {
# check input
checkmate::assert_character(author)
checkmate::assert_number(year, na.ok = TRUE)
checkmate::assert_character(title)
checkmate::assert_character(journal)
checkmate::assert_number(PMID, na.ok = TRUE)
checkmate::assert_character(DOI)

if (is.na(author) || is.na(year) || is.na(DOI)) {
if (is.na(author) || is.na(year) || is.na(journal) || is.na(title)) {
message(
"Citation cannot be created as either author, year or DOI is missing"
"Citation cannot be created as author, year, journal or title is missing"
)
return("No citation available")
return(utils::bibentry(bibtype = "Misc", title = "No citation"))
}

# change author formatting if multiple authors or et al
author <- gsub(
pattern = "_",
replacement = " ",
x = author,
fixed = TRUE
)
author <- gsub(
pattern = "etal",
replacement = "et al.",
x = author,
fixed = TRUE
)

# check if study has two authors and if so insert ampersand
num_authors <- length(unlist(strsplit(x = author, split = " ", fixed = TRUE)))
if (identical(num_authors, 2L)) {
author <- gsub(pattern = " ", replacement = " & ", x = author, fixed = TRUE)
if (!inherits(author, "person")) {
# imperfect solution as library currently only has first author
author_names <- unlist(strsplit(x = author, split = "_", fixed = TRUE))
authors <- lapply(author_names, utils::as.person)
authors <- Reduce(c, authors)
}

citation <- paste0(author, " (", year, ") ", "<", DOI, ">")

if (!is.na(PMID)) {
citation <- paste0(citation, " PMID: ", PMID)
}
citation <- utils::bibentry(
bibtype = "article",
author = authors,
year = year,
title = title,
journal = journal,
doi = DOI
)
citation$PMID <- PMID

message(
"Using ", citation, ". \n",
"To retrieve the short citation use the 'get_citation' function"
"Using ", format(citation), " \n",
"To retrieve the citation use the 'get_citation' function"
)

citation
Expand Down
33 changes: 17 additions & 16 deletions R/epiparam.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ validate_epiparam <- function(epiparam, reconstruct = FALSE) {
all(epiparam$year > 0 | is.na(epiparam$year))
)

check_limits <- apply(epiparam, MARGIN = 2, FUN = function(x) {
check_limits <- apply(epiparam, MARGIN = 2, FUN = function(x) { # nolint
vapply(x, function(y) {
length(y) == 2 && is.numeric(y)
}, FUN.VALUE = logical(1))
Expand Down Expand Up @@ -313,18 +313,19 @@ tail.epiparam <- function(x, ...) {
#' @keywords internal
#' @noRd
epiparam_fields <- function() {
c("disease", "pathogen", "epi_distribution", "author", "year", "sample_size",
"region", "transmission_mode", "vector", "extrinsic", "prob_distribution",
"inference_method", "mean", "mean_ci_limits", "mean_ci", "sd",
"sd_ci_limits", "sd_ci", "quantile_2.5", "quantile_5", "quantile_25",
"median", "median_ci_limits", "median_ci", "quantile_75", "quantile_87.5",
"quantile_95", "quantile_97.5", "lower_range", "upper_range", "shape",
"shape_ci_limits", "shape_ci", "scale", "scale_ci_limits", "scale_ci",
"meanlog", "meanlog_ci_limits", "meanlog_ci", "sdlog", "sdlog_ci_limits",
"sdlog_ci", "dispersion", "dispersion_ci_limits", "dispersion_ci",
"precision", "precision_ci_limits", "precision_ci", "truncation",
"discretised", "censored", "right_truncated", "phase_bias_adjusted",
"notes", "PMID", "DOI")
c("disease", "pathogen", "epi_distribution", "author", "title", "journal",
"year", "sample_size", "region", "transmission_mode", "vector",
"extrinsic", "prob_distribution", "inference_method", "mean",
"mean_ci_limits", "mean_ci", "sd", "sd_ci_limits", "sd_ci", "quantile_2.5",
"quantile_5", "quantile_25", "median", "median_ci_limits", "median_ci",
"quantile_75", "quantile_87.5", "quantile_95", "quantile_97.5",
"lower_range", "upper_range", "shape", "shape_ci_limits", "shape_ci",
"scale", "scale_ci_limits", "scale_ci", "meanlog", "meanlog_ci_limits",
"meanlog_ci", "sdlog", "sdlog_ci_limits", "sdlog_ci", "dispersion",
"dispersion_ci_limits", "dispersion_ci", "precision",
"precision_ci_limits", "precision_ci", "truncation", "discretised",
"censored", "right_truncated", "phase_bias_adjusted", "notes", "PMID",
"DOI")
}

#' Character fields (columns) of an `<epiparam>` object
Expand All @@ -335,9 +336,9 @@ epiparam_fields <- function() {
epiparam_char_fields <- function(epiparam) {
which(
colnames(epiparam) %in% c(
"disease", "pathogen", "epi_distribution", "author", "region",
"transmission_mode", "vector", "prob_distribution", "inference_method",
"notes", "DOI"
"disease", "pathogen", "epi_distribution", "author", "title", "journal",
"region", "transmission_mode", "vector", "prob_distribution",
"inference_method", "notes", "DOI"
)
)
}
Expand Down
70 changes: 41 additions & 29 deletions R/epiparam_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,10 @@ make_epidist <- function(x) {
citation = create_epidist_citation(
author = x$author,
year = x$year,
PMID = x$PMID,
DOI = x$DOI
title = x$title,
journal = x$journal,
DOI = x$DOI,
PMID = x$PMID
),
metadata = create_epidist_metadata(
sample_size = x$sample_size,
Expand Down Expand Up @@ -209,7 +211,7 @@ as_epiparam <- function(x) {

# for vb_epidist or list of epidists call as_epiparam recursively
if (!is_epidist(x)) {
eparam <- as.data.frame(matrix(nrow = length(x), ncol = 56))
eparam <- as.data.frame(matrix(nrow = length(x), ncol = 58))
for (i in seq_along(x)) {
if (i == 1) colnames(eparam) <- colnames(as_epiparam(x[[i]]))
eparam[i, ] <- as_epiparam(x[[i]])
Expand All @@ -221,29 +223,6 @@ as_epiparam <- function(x) {
# check input
validate_epidist(x)

# set default citation
author <- NA_character_
year <- NA_integer_
doi <- NA_character_
pmid <- NA_integer_

# if citation is available extract info
if (x$citation != "No citation available") {
# extract author from citation
author <- sub(" \\(.*", "", x$citation)
# extract year from citation
year <- gsub(pattern = "<(.*)", replacement = "", x = x$citation)
year <- sub("\\).*", "", sub(".*\\(", "", year))
# extract DOI from citation
doi <- sub(">.*", "", sub(".*<", "", x$citation))
# extract PMID if available
if (grepl(pattern = "PMID", x = x$citation, fixed = TRUE)) {
pmid <- as.numeric(sub(".*PMID: ", "", x$citation))
} else {
pmid <- NA_integer_
}
}

params <- get_parameters(x)
prob_dist <- family(x)

Expand Down Expand Up @@ -271,6 +250,17 @@ as_epiparam <- function(x) {
})
}

author <- ifelse(
test = is.null(x$citation$author),
yes = NA_character_,
no = Reduce(
f = function(x, y) {
paste(x, y, sep = "_")
},
x = x$citation$author
)
)

## TODO: look into redudancy of median and quantile 50 in epidist and
## epiparam class

Expand All @@ -279,7 +269,21 @@ as_epiparam <- function(x) {
pathogen = x$disease$pathogen,
epi_distribution = x$epi_dist,
author = author,
year = as.numeric(year),
title = ifelse(
test = is.null(x$citation$title),
yes = NA_character_,
no = x$citation$title
),
journal = ifelse(
test = is.null(x$citation$journal),
yes = NA_character_,
no = x$citation$journal
),
year = ifelse(
test = is.null(x$citation$year),
yes = NA_integer_,
no = as.numeric(x$citation$year)
),
sample_size = x$metadata$sample_size,
region = x$metadata$region,
transmission_mode = x$metadata$transmission_mode,
Expand Down Expand Up @@ -377,8 +381,16 @@ as_epiparam <- function(x) {
right_truncated = x$method_assess$right_truncated,
phase_bias_adjusted = x$method_assess$phase_bias_adjusted,
notes = x$notes,
PMID = pmid,
DOI = doi
PMID = ifelse(
test = is.null(x$citation$PMID),
yes = NA_integer_,
no = as.numeric(x$citation$PMID)
),
DOI = ifelse(
test = is.null(x$citation$doi),
yes = NA_character_,
no = x$citation$doi
)
)

# create lists for epiparam vector columns
Expand Down
16 changes: 16 additions & 0 deletions inst/extdata/data_dictionary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,22 @@ properties:
type: array
items:
type: string
title:
description: >
The title of the article that published the epidemiological parameters.
examples: ["Incubation period of COVID-19", "Serial internval of Ebola"]
type: array
items:
type: string
journal:
description: >
The name of the journal that published the article that published the
epidemiological parameters. This can also be a pre-print server, e.g.,
medRxiv.
examples: ["The Lancet", "PLoS One", "medRxiv"]
type: array
items:
type: string
year:
description: >
The year the paper or report was published.
Expand Down
Loading