diff --git a/.gitignore b/.gitignore index fc61979..e58875e 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ vignettes/*.pdf # knitr and R markdown default cache directories *_cache/ /cache/ +inst/doc/ # Temporary files created by R markdown *.utf8.md diff --git a/R/hmde_assign_data.R b/R/hmde_assign_data.R index 1b563ce..c976c9e 100644 --- a/R/hmde_assign_data.R +++ b/R/hmde_assign_data.R @@ -15,7 +15,15 @@ hmde_assign_data <- function(model_template, data = NULL,...){ if(!is.null(data)){ # Use provided tibble user_fields <- names(data) + user_code <- rlang::enquos(..., .check_assign = TRUE) + additional_user_fields <- names(user_code) + # Evaluate the RHS of expressions (the values) + additional_data <- purrr::map(user_code, + ~rlang::eval_tidy(.x, env = rlang::caller_env()) + ) + } else { # Grab user expressions from individual list items and extract data + additional_user_fields <- NULL user_code <- rlang::enquos(..., .check_assign = TRUE) user_fields <- names(user_code) # Evaluate the RHS of expressions (the values) @@ -44,15 +52,21 @@ hmde_assign_data <- function(model_template, data = NULL,...){ } for(i in model_fields){ # Iterate through required fields and fill them - if(i %in% user_fields){ + if(i %in% user_fields){ #Check if the user has supplied it in a tibble model_template <- purrr::list_modify(model_template, !!!data[i]) - } else if(is.null(model_template[[i]])){ # allows for default values + + } else if(!is.null(additional_user_fields)){ + if(i %in% additional_user_fields){ #Check if the user supplied it directly + model_template <- purrr::list_modify(model_template, !!!additional_data[i]) + } + } + + if(is.null(model_template[[i]])){ #Catches default tibble transformations model_template[[i]] <- switch( i, n_obs = length(data$y_obs), n_ind = length(unique(data$ind_id)), - y_bar = mean(data$y_obs), - model = model_template$model + y_bar = mean(data$y_obs) ) } diff --git a/R/hmde_models.R b/R/hmde_models.R index dac41e9..afc9b66 100644 --- a/R/hmde_models.R +++ b/R/hmde_models.R @@ -34,6 +34,8 @@ hmde_const_single_ind <- function(){ y_obs = NULL, obs_index = NULL, time = NULL, + prior_pars_ind_beta = c(0, 2), + prior_pars_global_error_sigma = c(0, 2), model = "constant_single_ind") } @@ -48,6 +50,9 @@ hmde_const_multi_ind <- function(){ obs_index = NULL, time = NULL, ind_id = NULL, + prior_pars_pop_beta_mu = c(0,2), + prior_pars_pop_beta_sigma = c(0,2), + prior_pars_global_error_sigma = c(0,2), model = "constant_multi_ind") } @@ -60,6 +65,10 @@ hmde_canham_single_ind <- function(){ y_obs = NULL, obs_index = NULL, time = NULL, + prior_pars_ind_max_growth = c(0,2), + prior_pars_ind_size_at_max_growth = c(0,2), + prior_pars_ind_k = c(0,2), + prior_pars_global_error_sigma = c(0,2), model = "canham_single_ind") } @@ -74,6 +83,13 @@ hmde_canham_multi_ind <- function(){ obs_index = NULL, time = NULL, ind_id = NULL, + prior_pars_pop_max_growth_mean = c(0,2), + prior_pars_pop_max_growth_sd = c(0,2), + prior_pars_pop_size_at_max_growth_mean = c(0,2), + prior_pars_pop_size_at_max_growth_sd = c(0,2), + prior_pars_pop_k_mean = c(0,2), + prior_pars_pop_k_sd = c(0,2), + prior_pars_global_error_sigma = c(0,2), model = "canham_multi_ind") } @@ -88,6 +104,9 @@ hmde_vb_single_ind <- function(){ obs_index = NULL, time = NULL, y_bar = NULL, + prior_pars_ind_max_size_sd_only = 2, + prior_pars_ind_growth_rate = c(0,2), + prior_pars_global_error_sigma = c(0,2), model = "vb_single_ind") } @@ -103,6 +122,11 @@ hmde_vb_multi_ind <- function(){ time = NULL, ind_id = NULL, y_bar = NULL, + prior_pars_pop_max_size_mean_sd_only = 2, + prior_pars_pop_max_size_sd = c(0,2), + prior_pars_pop_growth_rate_mean = c(0,2), + prior_pars_pop_growth_rate_sd = c(0,2), + prior_pars_global_error_sigma = c(0,2), model = "vb_multi_ind") } @@ -118,7 +142,7 @@ hmde_affine_single_ind <- function(){ time = NULL, int_method = NULL, y_bar = NULL, - prior_means = c(1,1), - prior_sds = c(2,2), + prior_pars_ind_const = c(1,2), + prior_pars_ind_beta_1 = c(0,2), model = "affine_single_ind") } diff --git a/inst/doc/hmde_paper.pdf b/inst/article/hmde_paper.pdf similarity index 100% rename from inst/doc/hmde_paper.pdf rename to inst/article/hmde_paper.pdf diff --git a/inst/stan/affine_single_ind.stan b/inst/stan/affine_single_ind.stan index 22a5e81..47376d7 100644 --- a/inst/stan/affine_single_ind.stan +++ b/inst/stan/affine_single_ind.stan @@ -66,8 +66,8 @@ data { real time[n_obs]; real y_bar; int int_method; - real prior_means[2]; //vector of means for beta parameter priors - real prior_sds[2]; //Vector of SDs for beta parameter priors + real prior_pars_ind_const[2]; //vector of pars for beta_c parameter priors + real prior_pars_ind_beta_1[2]; //Vector of pars for beta_1 parameter priors } // The parameters accepted by the model. @@ -122,8 +122,8 @@ model { //Priors //Individual level - ind_const ~normal(prior_means[1], prior_sds[1]); - ind_beta_1 ~lognormal(log(prior_means[2]), prior_sds[2]); + ind_const ~normal(prior_pars_ind_const[1], prior_pars_ind_const[2]); + ind_beta_1 ~lognormal(prior_pars_ind_beta_1[1], prior_pars_ind_beta_1[2]); } generated quantities{ @@ -131,7 +131,10 @@ generated quantities{ array[3] real pars; real ind_beta_0; vector[1] y_temp; - int vers = 1; + + //Return the used prior parameters + real check_prior_pars_ind_const[2] = prior_pars_ind_const; + real check_prior_pars_ind_beta[2] = prior_pars_ind_beta_1; ind_beta_0 = ind_const + ind_beta_1*y_bar; diff --git a/inst/stan/canham_multi_ind.stan b/inst/stan/canham_multi_ind.stan index 0a8f423..8823442 100644 --- a/inst/stan/canham_multi_ind.stan +++ b/inst/stan/canham_multi_ind.stan @@ -16,6 +16,13 @@ data { int obs_index[n_obs]; real time[n_obs]; int ind_id[n_obs]; + real prior_pars_pop_max_growth_mean[2]; + real prior_pars_pop_max_growth_sd[2]; + real prior_pars_pop_size_at_max_growth_mean[2]; + real prior_pars_pop_size_at_max_growth_sd[2]; + real prior_pars_pop_k_mean[2]; + real prior_pars_pop_k_sd[2]; + real prior_pars_global_error_sigma[2]; } // The parameters accepted by the model. @@ -74,21 +81,35 @@ model { pop_k_sd); //Species level - pop_max_growth_mean ~normal(0, 1); - pop_max_growth_sd ~cauchy(0, 1); - pop_size_at_max_growth_mean ~normal(0, 1); - pop_size_at_max_growth_sd ~cauchy(0, 1); - pop_k_mean ~normal(0, 1); - pop_k_sd ~cauchy(0, 1); + pop_max_growth_mean ~normal(prior_pars_pop_max_growth_mean[1], + prior_pars_pop_max_growth_mean[2]); + pop_max_growth_sd ~cauchy(prior_pars_pop_max_growth_sd[1], + prior_pars_pop_max_growth_sd[2]); + pop_size_at_max_growth_mean ~normal(prior_pars_pop_size_at_max_growth_mean[1], + prior_pars_pop_size_at_max_growth_mean[2]); + pop_size_at_max_growth_sd ~cauchy(prior_pars_pop_size_at_max_growth_sd[1], + prior_pars_pop_size_at_max_growth_sd[2]); + pop_k_mean ~normal(prior_pars_pop_k_mean[1], prior_pars_pop_k_mean[2]); + pop_k_sd ~cauchy(prior_pars_pop_k_sd[1], prior_pars_pop_k_sd[2]); //Global level - global_error_sigma ~cauchy(0, 2); + global_error_sigma ~cauchy(prior_pars_global_error_sigma[1], + prior_pars_global_error_sigma[2]); } generated quantities{ real y_hat[n_obs]; vector[1] y_temp; + //Return the used prior parameters + real check_prior_pars_pop_max_growth_mean[2] = prior_pars_pop_max_growth_mean; + real check_prior_pars_pop_max_growth_sd[2] = prior_pars_pop_max_growth_sd; + real check_prior_pars_pop_size_at_max_growth_mean[2] = prior_pars_pop_size_at_max_growth_mean; + real check_prior_pars_pop_size_at_max_growth_sd[2] = prior_pars_pop_size_at_max_growth_sd; + real check_prior_pars_pop_k_mean[2] = prior_pars_pop_k_mean; + real check_prior_pars_pop_k_sd[2] = prior_pars_pop_k_sd; + real check_prior_pars_global_error_sigma[2] = prior_pars_global_error_sigma; + for(i in 1:n_obs){ if(obs_index[i]==1){//Fits the first size diff --git a/inst/stan/canham_single_ind.stan b/inst/stan/canham_single_ind.stan index 193f017..05a8e86 100644 --- a/inst/stan/canham_single_ind.stan +++ b/inst/stan/canham_single_ind.stan @@ -14,6 +14,10 @@ data { real y_obs[n_obs]; int obs_index[n_obs]; real time[n_obs]; + real prior_pars_ind_max_growth[2]; + real prior_pars_ind_size_at_max_growth[2]; + real prior_pars_ind_k[2]; + real prior_pars_global_error_sigma[2]; } // The parameters accepted by the model. @@ -53,18 +57,27 @@ model { //Priors //Individual level - ind_max_growth ~lognormal(0, 1); - ind_size_at_max_growth ~lognormal(0, 1); - ind_k ~lognormal(0, 1); + ind_max_growth ~lognormal(prior_pars_ind_max_growth[1], + prior_pars_ind_max_growth[2]); + ind_size_at_max_growth ~lognormal(prior_pars_ind_size_at_max_growth[1], + prior_pars_ind_size_at_max_growth[2]); + ind_k ~lognormal(prior_pars_ind_k[1], prior_pars_ind_k[2]); //Global level - global_error_sigma ~cauchy(0, 2); + global_error_sigma ~cauchy(prior_pars_global_error_sigma[1], + prior_pars_global_error_sigma[2]); } generated quantities{ real y_hat[n_obs]; vector[1] y_temp; + //Return the used prior parameters + real check_prior_pars_ind_max_growth[2] = prior_pars_ind_max_growth; + real check_prior_pars_ind_size_at_max_growth[2] = prior_pars_ind_size_at_max_growth; + real check_prior_pars_ind_k[2] = prior_pars_ind_k; + real check_prior_pars_global_error_sigma[2] = prior_pars_global_error_sigma; + for(i in 1:n_obs){ if(obs_index[i]==1){//Fits the first size diff --git a/inst/stan/constant_multi_ind.stan b/inst/stan/constant_multi_ind.stan index 1b21144..a1bb379 100644 --- a/inst/stan/constant_multi_ind.stan +++ b/inst/stan/constant_multi_ind.stan @@ -18,6 +18,9 @@ data { int obs_index[n_obs]; real time[n_obs]; int ind_id[n_obs]; + real prior_pars_pop_beta_mu[2]; + real prior_pars_pop_beta_sigma[2]; + real prior_pars_global_error_sigma[2]; } // The parameters accepted by the model. @@ -60,17 +63,29 @@ model { pop_beta_sigma); //Population level - pop_beta_mu ~ normal(0, 1); - pop_beta_sigma ~cauchy(0, 1); + pop_beta_mu ~ normal(prior_pars_pop_beta_mu[1], + prior_pars_pop_beta_mu[2]); + pop_beta_sigma ~cauchy(prior_pars_pop_beta_sigma[1], + prior_pars_pop_beta_sigma[2]); //Global level - global_error_sigma ~cauchy(0, 2); + global_error_sigma ~cauchy(prior_pars_global_error_sigma[1], + prior_pars_global_error_sigma[2]); } // The output generated quantities { real y_hat[n_obs]; + //Return the used prior parameters + real check_prior_pars_pop_beta_mu[2]; + real check_prior_pars_pop_beta_sigma[2]; + real check_prior_pars_global_error_sigma[2]; + + check_prior_pars_pop_beta_mu = prior_pars_pop_beta_mu; + check_prior_pars_pop_beta_sigma = prior_pars_pop_beta_sigma; + check_prior_pars_global_error_sigma = prior_pars_global_error_sigma; + for(i in 1:n_obs){ //Fits the first size diff --git a/inst/stan/constant_single_ind.stan b/inst/stan/constant_single_ind.stan index 41127aa..55aff46 100644 --- a/inst/stan/constant_single_ind.stan +++ b/inst/stan/constant_single_ind.stan @@ -16,6 +16,8 @@ data { real y_obs[n_obs]; int obs_index[n_obs]; real time[n_obs]; + real prior_pars_ind_beta[2]; + real prior_pars_global_error_sigma[2]; } // The parameters accepted by the model. @@ -49,16 +51,22 @@ model { //Priors //Individual level - ind_beta ~ lognormal(0.1, 1); + ind_beta ~ lognormal(prior_pars_ind_beta[1], + prior_pars_ind_beta[2]); //Global level - global_error_sigma ~cauchy(0, 2); + global_error_sigma ~cauchy(prior_pars_global_error_sigma[1], + prior_pars_global_error_sigma[2]); } // The output generated quantities { real y_hat[n_obs]; + //Return the used prior parameters + real check_prior_pars_ind_beta[2] = prior_pars_ind_beta; + real check_prior_pars_global_error_sigma[2] = prior_pars_global_error_sigma; + for(i in 1:n_obs){ //Fits the first size if(obs_index[i]==1){ diff --git a/inst/stan/vb_multi_ind.stan b/inst/stan/vb_multi_ind.stan index 998da2f..27e3ab9 100644 --- a/inst/stan/vb_multi_ind.stan +++ b/inst/stan/vb_multi_ind.stan @@ -16,6 +16,11 @@ data { real time[n_obs]; int ind_id[n_obs]; real y_bar; + real prior_pars_pop_max_size_mean_sd_only; + real prior_pars_pop_max_size_sd[2]; + real prior_pars_pop_growth_rate_mean[2]; + real prior_pars_pop_growth_rate_sd[2]; + real prior_pars_global_error_sigma[2]; } // The parameters accepted by the model. @@ -67,19 +72,32 @@ model { ind_max_size ~lognormal(pop_max_size_mean, pop_max_size_sd); //Population level - pop_growth_rate_mean ~normal(0, 2); - pop_growth_rate_sd ~cauchy(0, 2); - pop_max_size_mean ~normal(max(log(y_obs)), 2); - pop_max_size_sd ~cauchy(0, 2); + pop_max_size_mean ~normal(log(max(y_obs)), + prior_pars_pop_max_size_mean_sd_only); + pop_max_size_sd ~cauchy(prior_pars_pop_max_size_sd[1], + prior_pars_pop_max_size_sd[2]); + pop_growth_rate_mean ~normal(prior_pars_pop_growth_rate_mean[1], + prior_pars_pop_growth_rate_mean[2]); + pop_growth_rate_sd ~cauchy(prior_pars_pop_growth_rate_sd[1], + prior_pars_pop_growth_rate_sd[2]); //Global level - global_error_sigma ~cauchy(0, 2); + global_error_sigma ~cauchy(prior_pars_global_error_sigma[1], + prior_pars_global_error_sigma[2]); } generated quantities{ real y_hat[n_obs]; array[3] real pars; + //Return the used prior parameters + real check_prior_pars_pop_max_size_mean_sd_only = prior_pars_pop_max_size_mean_sd_only; + real check_prior_pars_pop_max_size_mean_mean_max_obs = log(max(y_obs)); + real check_prior_pars_pop_max_size_sd[2] = prior_pars_pop_max_size_sd; + real check_prior_pars_pop_growth_rate_mean[2] = prior_pars_pop_growth_rate_mean; + real check_prior_pars_pop_growth_rate_sd[2] = prior_pars_pop_growth_rate_sd; + real check_prior_pars_global_error_sigma[2] = prior_pars_global_error_sigma; + for(i in 1:n_obs){ // Initialise the parameters for the observation pars[1] = ind_max_size[ind_id[i]] - y_bar; diff --git a/inst/stan/vb_single_ind.stan b/inst/stan/vb_single_ind.stan index 89b3dd3..7f7a1b5 100644 --- a/inst/stan/vb_single_ind.stan +++ b/inst/stan/vb_single_ind.stan @@ -14,6 +14,9 @@ data { int obs_index[n_obs]; real time[n_obs]; real y_bar; + real prior_pars_ind_max_size_sd_only; + real prior_pars_ind_growth_rate[2]; + real prior_pars_global_error_sigma[2]; } // The parameters accepted by the model. @@ -53,17 +56,26 @@ model { //Priors //Individual level - ind_max_size ~lognormal(0, 1); - ind_growth_rate ~lognormal(0, 1); //Take max obs. size as average value + ind_max_size ~lognormal(log(max(y_obs)), + prior_pars_ind_max_size_sd_only); + ind_growth_rate ~lognormal(prior_pars_ind_growth_rate[1], + prior_pars_ind_growth_rate[2]); //Global level - global_error_sigma ~cauchy(0, 2); + global_error_sigma ~cauchy(prior_pars_global_error_sigma[1], + prior_pars_global_error_sigma[2]); } generated quantities{ real y_hat[n_obs]; array[3] real pars; + //Return the used prior parameters + real check_prior_pars_ind_max_size_sd_only = prior_pars_ind_max_size_sd_only; + real check_prior_pars_ind_max_size_mean_max_obs = log(max(y_obs)); + real check_prior_pars_ind_growth_rate[2] = prior_pars_ind_growth_rate; + real check_prior_pars_global_error_sigma[2] = prior_pars_global_error_sigma; + pars[1] = ind_max_size - y_bar; pars[2] = ind_growth_rate; pars[3] = ind_y_0 - y_bar; diff --git a/tests/testthat/helper-testing.R b/tests/testthat/helper-testing.R index 464243d..2aa9078 100644 --- a/tests/testthat/helper-testing.R +++ b/tests/testthat/helper-testing.R @@ -61,6 +61,8 @@ hmde_test_single_individual <- function(model_name, } hmde_test_multi_individual <- function(model_name, data, est_dim){ + iter <- 20 + if(! is.null(data$step_size)){ if(is.null(data$y_bar)){ #Models that do not require centering # Test multi-individual @@ -74,7 +76,7 @@ hmde_test_multi_individual <- function(model_name, data, est_dim){ time = data$time, #Vector length N_obs ind_id = data$ind_id #Vector length N_obs ) |> - hmde_run(chains = 2, iter = 100, verbose = FALSE, show_messages = FALSE) + hmde_run(chains = 2, iter = iter, verbose = FALSE, show_messages = FALSE) ) } else { #Models that do require centering with y_bar # Test multi-individual @@ -89,7 +91,7 @@ hmde_test_multi_individual <- function(model_name, data, est_dim){ y_bar = data$y_bar, #Real ind_id = data$ind_id #Vector length N_obs ) |> - hmde_run(chains = 2, iter = 100, verbose = FALSE, show_messages = FALSE) + hmde_run(chains = 2, iter = iter, verbose = FALSE, show_messages = FALSE) ) } @@ -104,13 +106,13 @@ hmde_test_multi_individual <- function(model_name, data, est_dim){ time = data$time, #Vector length N_obs ind_id = data$ind_id #Vector length N_obs ) |> - hmde_run(chains = 2, iter = 100, verbose = FALSE, show_messages = FALSE) + hmde_run(chains = 2, iter = iter, verbose = FALSE, show_messages = FALSE) ) } # Extract samples multi_samples <- rstan::extract(multi_ind_test, permuted = FALSE, inc_warmup = TRUE) - expect_equal(dim(multi_samples), c(100, 2, est_dim)) + expect_equal(dim(multi_samples), c(iter, 2, est_dim)) expect_visible(multi_ind_test) expect_s4_class(multi_ind_test, "stanfit") diff --git a/tests/testthat/test-hmde_assign_data.R b/tests/testthat/test-hmde_assign_data.R index d0e69c6..3c3fbb4 100644 --- a/tests/testthat/test-hmde_assign_data.R +++ b/tests/testthat/test-hmde_assign_data.R @@ -121,7 +121,7 @@ test_that("Execution and output: bad input", { }) -test_that("Execution and output: default values", { +test_that("Execution and output: prior values", { time <- 1:5 y_obs <- 1:5 obs_index <- 1:5 @@ -135,7 +135,7 @@ test_that("Execution and output: default values", { step_size = 1, int_method = 1) - expect_equal(default_used$prior_means, c(1,1)) + expect_equal(default_used$prior_pars_ind_const, c(1,2)) value_supplied <- hmde_model("affine_single_ind") |> hmde_assign_data(n_obs = length(y_obs), @@ -145,7 +145,13 @@ test_that("Execution and output: default values", { y_bar = mean(y_obs), step_size = 1, int_method = 1, - prior_means = c(5,5)) + prior_pars_ind_const = c(5,5)) - expect_equal(value_supplied$prior_means, c(5,5)) + expect_equal(value_supplied$prior_pars_ind_const, c(5,5)) + + value_supplied_tibble_used <- hmde_model("constant_multi_ind") |> + hmde_assign_data(data = Trout_Size_Data, + prior_pars_pop_beta_mu = c(1, 3)) + + expect_equal(value_supplied_tibble_used$prior_pars_pop_beta_mu, c(1, 3)) }) diff --git a/tests/testthat/test-hmde_models_affine.R b/tests/testthat/test-hmde_models_affine.R index b6000b5..f1110d3 100644 --- a/tests/testthat/test-hmde_models_affine.R +++ b/tests/testthat/test-hmde_models_affine.R @@ -3,8 +3,8 @@ test_that("Model structures: affine", { # Single individual single_model <- hmde_model("affine_single_ind") expect_named(single_model, c("step_size", "n_obs", "y_obs", "obs_index", - "time", "int_method", "y_bar", "prior_means", - "prior_sds", "model")) + "time", "int_method", "y_bar", "prior_pars_ind_const", + "prior_pars_ind_beta_1", "model")) expect_type(single_model, "list") expect_visible(single_model) }) diff --git a/tests/testthat/test-hmde_models_canham.R b/tests/testthat/test-hmde_models_canham.R index 49f8e02..5295572 100644 --- a/tests/testthat/test-hmde_models_canham.R +++ b/tests/testthat/test-hmde_models_canham.R @@ -4,6 +4,10 @@ test_that("Model structures: canham", { single_model <- hmde_model("canham_single_ind") expect_named(single_model, c("n_obs", "y_obs", "obs_index", "time", + "prior_pars_ind_max_growth", + "prior_pars_ind_size_at_max_growth", + "prior_pars_ind_k", + "prior_pars_global_error_sigma", "model")) expect_type(single_model, "list") expect_visible(single_model) @@ -12,6 +16,13 @@ test_that("Model structures: canham", { multi_model <- hmde_model("canham_multi_ind") expect_named(multi_model, c("n_obs", "n_ind", "y_obs", "obs_index", "time", "ind_id", + "prior_pars_pop_max_growth_mean", + "prior_pars_pop_max_growth_sd", + "prior_pars_pop_size_at_max_growth_mean", + "prior_pars_pop_size_at_max_growth_sd", + "prior_pars_pop_k_mean", + "prior_pars_pop_k_sd", + "prior_pars_global_error_sigma", "model")) expect_type(multi_model, "list") expect_visible(multi_model) @@ -37,6 +48,7 @@ test_that("Execution: canham multiple individuals", { 1 + #Global error data$n_obs + #y_ij 1 + #y_temp + 14 + #checks for priors 1 #lp__ hmde_test_multi_individual(model_name, data, est_dim) diff --git a/tests/testthat/test-hmde_models_constant.R b/tests/testthat/test-hmde_models_constant.R index 522143b..b595554 100644 --- a/tests/testthat/test-hmde_models_constant.R +++ b/tests/testthat/test-hmde_models_constant.R @@ -4,6 +4,8 @@ test_that("Model structures: Constant", { single_model <- hmde_model("constant_single_ind") expect_named(single_model, c("n_obs", "y_obs", "obs_index", "time", + "prior_pars_ind_beta", + "prior_pars_global_error_sigma", "model")) expect_type(single_model, "list") expect_visible(single_model) @@ -12,6 +14,9 @@ test_that("Model structures: Constant", { multi_model <- hmde_model("constant_multi_ind") expect_named(multi_model, c("n_obs", "n_ind", "y_obs", "obs_index", "time", "ind_id", + "prior_pars_pop_beta_mu", + "prior_pars_pop_beta_sigma", + "prior_pars_global_error_sigma", "model")) expect_type(multi_model, "list") expect_visible(multi_model) @@ -36,6 +41,7 @@ test_that("Execution: Constant multiple individuals", { data$n_pars * 2 + #Population parameters 1 + #Global error data$n_obs + #y_ij + 6 + #checks for priors 1 #lp__ hmde_test_multi_individual(model_name, data, est_dim) diff --git a/tests/testthat/test-hmde_models_vb.R b/tests/testthat/test-hmde_models_vb.R index dc2a65b..dba96bb 100644 --- a/tests/testthat/test-hmde_models_vb.R +++ b/tests/testthat/test-hmde_models_vb.R @@ -4,7 +4,11 @@ test_that("Model structures: vb", { single_model <- hmde_model("vb_single_ind") expect_named(single_model, c("n_obs", "y_obs", "obs_index", "time", - "y_bar", "model")) + "y_bar", + "prior_pars_ind_max_size_sd_only", + "prior_pars_ind_growth_rate", + "prior_pars_global_error_sigma", + "model")) expect_type(single_model, "list") expect_visible(single_model) @@ -12,7 +16,13 @@ test_that("Model structures: vb", { multi_model <- hmde_model("vb_multi_ind") expect_named(multi_model, c("n_obs", "n_ind", "y_obs", "obs_index", "time", "ind_id", - "y_bar", "model")) + "y_bar", + "prior_pars_pop_max_size_mean_sd_only", + "prior_pars_pop_max_size_sd", + "prior_pars_pop_growth_rate_mean", + "prior_pars_pop_growth_rate_sd", + "prior_pars_global_error_sigma", + "model")) expect_type(multi_model, "list") expect_visible(multi_model) }) @@ -37,6 +47,7 @@ test_that("Execution: vb multiple individuals", { 1 + #Global error data$n_obs + #y_ij data$n_pars + 1 + #Pars temp vector + 10 + #checks for priors 1 #lp__ hmde_test_multi_individual(model_name, data, est_dim) diff --git a/vignettes/canham.Rmd b/vignettes/canham.Rmd index 16b0301..59abd61 100644 --- a/vignettes/canham.Rmd +++ b/vignettes/canham.Rmd @@ -53,6 +53,25 @@ ggplot() + xlim=c(y_0, y_final)) ``` +The default priors for the Canham top-level parameters for the single individual model are +$$g_{max} \sim \log\mathcal{N}(0, 2),$$ +$$S_{max} \sim \log\mathcal{N}(0, 2),$$ +$$k \sim \log\mathcal{N}(0, 2)$$ +$$0 <\sigma_e \sim Cauchy(0, 2).$$ +For the multi-individual model the prior structure and default parameters are +$$\mu_{g_{max}} \sim \mathcal{N}(0, 2),$$ +$$0 < \sigma_{g_{max}} \sim Cauchy(0, 2),$$ +$$\mu_{S_{max}} \sim \mathcal{N}(0, 2),$$ +$$0 < \sigma_{S_{max}} \sim Cauchy(0, 2),$$ +$$\mu_{k} \sim \mathcal{N}(0, 2),$$ +$$0 < \sigma_{k} \sim \sim Cauchy(0, 2),$$ +$$0 <\sigma_e \sim Cauchy(0, 2).$$ +To see the name for the prior parameter run `hmde_model`. For example in the following we want to change the prior for $g_{max}$ (`ind_max_growth`) in the individual model: +```{r} +hmde_model("canham_single_ind") +#prior_pars_ind_max_growth is the argument name for the prior parameters +``` + The data we use for this demonstration comes from the Barro Colorado Island long term forest plot @condit2019dataset. We have a simple random sample without replacement of 50 individuals from \textit{Garcinia recondita}, subject to the following inclusion criteria for individuals: \begin{itemize} \item 6 observations since the 1990 change in measurement precision, diff --git a/vignettes/constant-growth.Rmd b/vignettes/constant-growth.Rmd index 61ec46a..ab52286 100644 --- a/vignettes/constant-growth.Rmd +++ b/vignettes/constant-growth.Rmd @@ -30,8 +30,6 @@ f(Y(t), \beta) = \beta \end{equation} where $\beta$ is the average growth rate. The constant growth model corresponds to linear sizes over time, and is equivalent to a linear mixed model for size, where there is an individual effect when fit to multiple individuals. -Let's simulate some data to visualise the constant growth function. - ### Load dependencies ```{r setup} @@ -43,6 +41,24 @@ library(dplyr) library(ggplot2) ``` +### Priors +The default priors for the constant top-level parameters in the single individual model are +$$\beta\sim \log\mathcal{N}(0, 2),$$ +$$0 <\sigma_e \sim Cauchy(0, 2).$$ + +For the multi-individual model the prior structure and default parameters are +$$\mu_{\beta} \sim \mathcal{N}(0, 2),$$ +$$0 < \sigma_{\beta} \sim Cauchy(0, 2),$$ +$$0 < \sigma_{e} \sim Cauchy(0, 2).$$ +To change the prior parameter values (the distributions are fixed) optional arguments can be passed to `hmde_assign_data` with names corresponding to the `prior_pars` argument for the associated parameter as output by `hmde_models()`. For example in the following we want to change the prior for $\beta$ (`ind_beta`) in the individual model: +```{r} +hmde_model("constant_single_ind") +#prior_pars_ind_beta is the argument name for the prior parameters +``` +The mean passed to a log-normal distribution is the mean of the **underlying normal distribution**, so if you want to pass a value based on raw observations you need to log-transform it first. + +Let's simulate some data to visualise the constant growth function. + ### Simulate data ```{r} diff --git a/vignettes/here_be_dragons.Rmd b/vignettes/here_be_dragons.Rmd index 8fff756..2d03897 100644 --- a/vignettes/here_be_dragons.Rmd +++ b/vignettes/here_be_dragons.Rmd @@ -50,11 +50,16 @@ For this demonstration we will simulate data as though it is measured in centime ## The model We are implementing a longitudinal model of the form in Equation (1) within a hierarchical Bayesian longitudinal model where $$f(Y(t), \beta_0, \beta_1) = \beta_0 - \beta_1 Y(t)\qquad (2)$$ -Equation (2) is known to produce pathological behaviour from numerical methods [@butcher2016numerical], so serves as an ideal simple example of the interaction between those pathologies and the MCMC sampling process. We are attempting to estimate the parameters $\beta_0$ and $\beta_1$ from observations $y_j$, which is based on estimating $\hat{Y}_j$ given the prior distribution +Equation (2) is known to produce pathological behaviour from numerical methods [@butcher2016numerical], so serves as an ideal simple example of the interaction between those pathologies and the MCMC sampling process. For the purpose of estimation we shift Equation (2) by the mean observed size which gives +$$f_{fit}(Y(t), \beta_c, \beta_1) = \beta_c - \beta_1(Y(t) - \bar{y}),$$ +then the back-transformation +$$\beta_0 = \beta_c + \beta_1 \bar{y}.$$ + +We are attempting to estimate the parameters $\beta_0$ and $\beta_1$ from observations $y_j$, which is based on estimating $\hat{Y}_j$ given the prior distribution $$y_j \sim \mathcal{N}(\hat{Y}_j, 0.1).$$ -As we are looking at a single individual, we have prior distributions for the parameters which are -$$\beta_k \sim \log\mathcal{N}(0,2),$$ +As we are looking at a single individual, we have prior distributions for the default parameters which are +$$\beta_1, \beta_c \sim \log\mathcal{N}(0,2),$$ and enforce $\beta_k > 0$. ## Simulating data diff --git a/vignettes/hmde_for_mathematicians.Rmd b/vignettes/hmde_for_mathematicians.Rmd index 1b691c1..069f4e3 100644 --- a/vignettes/hmde_for_mathematicians.Rmd +++ b/vignettes/hmde_for_mathematicians.Rmd @@ -19,6 +19,20 @@ knitr::opts_chunk$set( This vignette is intended for a statistical/mathematical audience who are interested in Bayesian inverse problems. For biologists looking for an applications-based walkthrough the other vignettes in this package -- `hmde`, `constant-growth`, `von-bertalanffy`, and `canham` -- are less theoretical. +# Getting started with {hmde} +'hmde' is under active development, you can install the development version of 'hmde' from [GitHub](https://github.com/) with: + +```{r, eval=FALSE} +# install.packages("remotes") +remotes::install_github("traitecoevo/hmde") +``` + +```{r} +library(dplyr) +library(ggplot2) +library(hmde) +``` + # The Theory The underlying method that the hmde package implements leverages the longitudinal structure of repeat measurement data to estimate parameters for an underlying differential equation, as first demonstrated in \cite{obrien2024allindividuals}. This method is an example of a Bayesian inverse method for differential equation estimation: we are attempting to estimate the parameters of a chosen DE based on observations of the resulting process. We assume that the data consists of a finite number of discrete (likely sparse) observations with measurement error at the individual level, and have a set number of possible differential equations ready to be fit to data. @@ -43,7 +57,7 @@ f = f_{max} \exp\Bigg(-\frac{1}{2}\bigg(\frac{\log(Y(t)/Y_{max})}{k} \bigg)^2 \B $$ which is considered a reasonable approximation of long term growth behaviour for some tree species as shown in \cite{obrien2024allindividuals} and [Chapter 2]. Equation (3) is extremely non-linear and does not have an analytic solution, forcing the use of numerical methods in order to estimate the growth increments in Equation (1). -- Linear: +- Affine: $$f = \beta_0 - \beta_1 Y(t)$$ which is only included for demonstration purposes of where numerical methods can go wrong as it is a re-parameterisation of the von Bertalanffy model. @@ -71,10 +85,14 @@ $$ $$ and typically use the following priors: $$ -\mu_k \sim\mathcal{N}(0,1), \quad 0 < \sigma_k\sim Cauchy(0, 1). +\mu_k \sim\mathcal{N}(0,2), \quad 0 < \sigma_k\sim Cauchy(0, 2). $$ +For details on the prior distributions see the vignette for a specific model or check the Stan file. To see the default values run `hmde_model` on the model name as a string and look at the `prior_pars` argument for the parameter name: +```{r} +hmde_model("canham_multi_ind") +``` -The error parameter $\sigma_e >0$ is assumed to operate at a global level independent of individual and typically has a Cauchy prior with location 0, spread parameter 1. +The error parameter $\sigma_e >0$ is assumed to operate at a global level independent of individual and typically has a Cauchy prior with location 0, spread parameter 2. Estimation is done using MCMC through Stan. @@ -83,19 +101,7 @@ Numerical methods are required for the Canham model as it has no analytic soluti For the von Bertalanffy model an analytic solution is used in order to avoid numerical problems. For the constant model all numerical methods are the same and give the same result as the analytic solution so Euler is used. -# Getting started with {hmde} -'hmde' is under active development, you can install the development version of 'hmde' from [GitHub](https://github.com/) with: - -```{r, eval=FALSE} -# install.packages("remotes") -remotes::install_github("traitecoevo/hmde") -``` -```{r} -library(dplyr) -library(ggplot2) -library(hmde) -``` # Demonstration: Canham Growth - Multiple Individuals The provided tree data for 50 individuals takes a few hours to run. As such, the following block does not run by default, and instead we leverage the provided estimates data file: `Tree_Size_Ests`. The estimates are posterior mean, median, and 95\% central credible intervals for parameters, and mean posterior estimates of sizes over time based on the longitudinal model. ```{r, eval=FALSE} diff --git a/vignettes/von-bertalanffy.Rmd b/vignettes/von-bertalanffy.Rmd index 9997f2e..daebe2a 100644 --- a/vignettes/von-bertalanffy.Rmd +++ b/vignettes/von-bertalanffy.Rmd @@ -3,9 +3,11 @@ title: "Case study 2: von Bertalanffy growth with lizard size data" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{von-bertalanffy} - %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} bibliography: vignette.bib +editor_options: + chunk_output_type: console --- ```{r, include = FALSE} @@ -16,17 +18,6 @@ knitr::opts_chunk$set( options(rmarkdown.html_vignette.check_title = FALSE) ``` -## Overview -Our second demo introduces size-dependent growth based on the von Bertalanffy function -\begin{equation}\label{eqn_vb_DE} -f(Y(t); S_{max}, \beta) = \beta(S_{max} - Y(t)), -\end{equation} -where $S_{max}$ is the asymptotic maximum size and $\beta$ controls the growth rate. We have implemented the analytic solution -\begin{equation}\label{eqn_vb_soln} -Y(t) = S_{max} + (Y(0) - S_{max}) \exp(-t\beta) -\end{equation} -which is independent of age at the starting size $Y(0)$ and instead uses the first size as the initial condition. The key behaviour of the von Bertalanffy model is a high growth rate at small sizes that declines linearly as the size approaches $S_{max}$. This manifests as growth slowing as a creature matures with a hard finite limit on the eventual size. We restrict $\beta$ and $S_{max}$ to be positive, and $S_{max}$ to be larger than the observed sizes. As a result the growth rate is non-negative. - ### Load dependencies ```{r setup} @@ -38,6 +29,35 @@ library(dplyr) library(ggplot2) ``` +## Overview +Our second demo introduces size-dependent growth based on the von Bertalanffy function +\begin{equation}\label{eqn_vb_DE} +f(Y(t); S_{max}, \beta) = \beta(S_{max} - Y(t)), +\end{equation} +where $S_{max}$ is the asymptotic maximum size and $\beta$ controls the growth rate. We have implemented the analytic solution +\begin{equation}\label{eqn_vb_soln} +Y(t) = S_{max} + (Y(0) - S_{max}) \exp(-t\beta) +\end{equation} +which is independent of age at the starting size $Y(0)$ and instead uses the first size as the initial condition. The key behaviour of the von Bertalanffy model is a high growth rate at small sizes that declines linearly as the size approaches $S_{max}$. This manifests as growth slowing as a creature matures with a hard finite limit on the eventual size. We restrict $\beta$ and $S_{max}$ to be positive. As a result the growth rate is non-negative. + +### Priors +The default priors for the constant top-level parameters in the single individual model are +$$S_{max} \sim \log\mathcal{N}(\log(\max(y_{obs})), 2),$$ +$$\beta \sim \log\mathcal{N}(0, 2),$$ +$$0 <\sigma_e \sim Cauchy(0, 2).$$ + +For the multi-individual model the prior structure and default parameters are +$$\mu_{S_{max}} \sim \mathcal{N}(\log(\max(y_{obs})), 2),$$ +$$0 < \sigma_{S_{max}} \sim Cauchy(0, 2),$$ +$$\mu_{\beta} \sim \mathcal{N}(0, 2),$$ +$$0 < \sigma_{\beta} \sim Cauchy(0, 2),$$ +$$0 < \sigma_{e} \sim Cauchy(0, 2).$$ +The max size parameter priors are always centred at the (transformed) maximum observed size. This is not changeable, but the standard deviation is. To see the name for the prior parameter run `hmde_model`. For example in the following we want to change the prior for $S_{max}$ standard deviation (`ind_max_size`) in the individual model: +```{r} +hmde_model("vb_single_ind") +#prior_pars_ind_max_size_sd_only is the argument name for the prior parameter +``` + ### Visualise model In the following code we plot an example of the growth function and the solution to get a feel for the behaviour. ```{r} @@ -137,7 +157,7 @@ r_sq <- paste0("R^2 = ", signif(r_sq_est, digits = 3)) -ggplot(data = lizard_estimates$measurement_data, +obs_scatter <- ggplot(data = lizard_estimates$measurement_data, aes(x = y_obs, y = y_hat)) + geom_point(shape = 16, size = 1, colour = "green4") + xlab("Y obs.") + @@ -148,16 +168,16 @@ ggplot(data = lizard_estimates$measurement_data, theme_classic() #Plots of size over time for a sample of 5 individuals -hmde_plot_obs_est_inds(n_ind_to_plot = 5, +obs_est_ind <- hmde_plot_obs_est_inds(n_ind_to_plot = 5, measurement_data = lizard_estimates$measurement_data) + theme(legend.position = "inside", - legend.position.inside = c(0.05, 0.9)) + legend.position.inside = c(0.8, 0.2)) ``` We have two parameters at the individual level and are interested in both their separate distributions, and if we see evidence of a relationship between them. We can also use the individual parameter estimates and estimated sizes to plot the growth function pieces. ```{r} #1-dimensional parameter distributions -ggplot(lizard_estimates$individual_data, +s_max_hist <- ggplot(lizard_estimates$individual_data, aes(ind_max_size_mean)) + geom_histogram(bins = 10, colour = "black", @@ -165,7 +185,7 @@ ggplot(lizard_estimates$individual_data, labs(x="S_max estimate") + theme_classic() -ggplot(lizard_estimates$individual_data, +beta_hist <- ggplot(lizard_estimates$individual_data, aes(ind_growth_rate_mean)) + geom_histogram(bins = 10, colour = "black", @@ -174,7 +194,7 @@ ggplot(lizard_estimates$individual_data, theme_classic() #2-dimensional parameter distribution -ggplot(data = lizard_estimates$individual_data, +par_scatter <- ggplot(data = lizard_estimates$individual_data, aes(x = ind_max_size_mean, y = ind_growth_rate_mean)) + geom_point(shape = 16, size = 1, colour = "green4") + xlab("Individual max sizes (mm)") + @@ -187,7 +207,7 @@ cor(lizard_estimates$individual_data$ind_max_size_mean, method = "spearman") #Plot function pieces over estimated sizes. -hmde_plot_de_pieces(model = "vb_multi_ind", +de_pieces <- hmde_plot_de_pieces(model = "vb_multi_ind", individual_data = lizard_estimates$individual_data, measurement_data = lizard_estimates$measurement_data) ```