Skip to content

Commit

Permalink
Update trial.R to have more col types, create trial.csv and remove rds
Browse files Browse the repository at this point in the history
  • Loading branch information
shaunporwal committed Jan 7, 2025
1 parent 8434dff commit 365bc26
Show file tree
Hide file tree
Showing 3 changed files with 233 additions and 213 deletions.
44 changes: 32 additions & 12 deletions data-raw/trial.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Taken from:
# https://github.com/ddsjoberg/gtsummary/blob/main/data-raw/trial.R

# Made some modifications

# Results from a cohort study of Drug A vs B

set.seed(8976)
Expand All @@ -11,7 +13,7 @@ trial <-
age = rnorm(n, mean = 50, sd = 15) |> as.integer(),
marker = rgamma(n, 1, 1) |> round(digits = 3),
stage = sample(c("T1", "T2", "T3", "T4"), size = n, replace = TRUE) |> factor(),
grade = sample(c("I", "II", "III"), size = n, replace = TRUE) |>factor(),
grade = sample(c("I", "II", "III"), size = n, replace = TRUE) |> factor(),
response_prob =
1 / (1 + exp(-((trt == "Drug") - 0.2 * as.numeric(stage) - 0.1 * as.numeric(grade) + 0.1 * marker))),
response = runif(n) < response_prob,
Expand All @@ -21,25 +23,43 @@ trial <-
-0.1 * as.numeric(grade) +
rnorm(n, sd = 0.5)) * 12,
death = ifelse(ttdeath_true <= 24, 1L, 0L),
ttdeath = pmin(ttdeath_true, 24) |> round(digits = 2)
ttdeath = pmin(ttdeath_true, 24) |> round(digits = 2),

# New columns added below:
visit_date = sample(seq.Date(as.Date("2020-01-01"), as.Date("2022-01-01"), by = "days"), n, replace = TRUE), # Date
follow_up_date = sample(seq.Date(as.Date("2022-01-02"), as.Date("2024-01-01"), by = "days"), n, replace = TRUE), # Date

has_side_effects = sample(c(TRUE, FALSE), n, replace = TRUE), # Logical
enrolled_in_study = sample(c(TRUE, FALSE), n, replace = TRUE), # Logical

bmi = round(rnorm(n, mean = 25, sd = 4), 1), # Numeric
systolic_bp = round(rnorm(n, mean = 120, sd = 15)), # Numeric

patient_id = paste0("ID-", sprintf("%03d", seq(1, n))), # Character
hospital = sample(c("Hospital A", "Hospital B", "Hospital C"), n, replace = TRUE), # Character

insurance = sample(c("Private", "Medicaid", "Medicare"), n, replace = TRUE) |> factor(), # Factor
smoking_status = sample(c("Non-smoker", "Former smoker", "Current smoker"), n, replace = TRUE) |> factor() # Factor
) |>
dplyr::mutate(
age = ifelse(runif(n) < 0.95, age, NA_real_),
marker = ifelse(runif(n) < 0.95, marker, NA_real_),
response = ifelse(runif(n) < 0.95, response, NA_integer_)
) |>
dplyr::select(-dplyr::one_of("response_prob", "ttdeath_true"))
summary(trial)

attr(trial$trt, "label") <- "Chemotherapy Treatment"
attr(trial$age, "label") <- "Age"
attr(trial$marker, "label") <- "Marker Level (ng/mL)"
attr(trial$stage, "label") <- "T Stage"
attr(trial$grade, "label") <- "Grade"
attr(trial$response, "label") <- "Tumor Response"
attr(trial$death, "label") <- "Patient Died"
attr(trial$ttdeath, "label") <- "Months to Death/Censor"
summary(trial)

usethis::use_data(trial, overwrite = TRUE)
# Adding labels to new columns
attr(trial$visit_date, "label") <- "Date of Visit"
attr(trial$follow_up_date, "label") <- "Date of Follow-Up"
attr(trial$has_side_effects, "label") <- "Has Side Effects"
attr(trial$enrolled_in_study, "label") <- "Enrolled in Study"
attr(trial$bmi, "label") <- "Body Mass Index (BMI)"
attr(trial$systolic_bp, "label") <- "Systolic Blood Pressure"
attr(trial$patient_id, "label") <- "Patient ID"
attr(trial$hospital, "label") <- "Hospital"
attr(trial$insurance, "label") <- "Insurance Type"
attr(trial$smoking_status, "label") <- "Smoking Status"

trial |> readr::write_csv(file = here::here('data/trial.csv'))
Loading

0 comments on commit 365bc26

Please sign in to comment.