mrc-ide · MJomaba · Feb 16, 2024 · Feb 27, 2024 · Feb 28, 2024 · Feb 29, 2024
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,6 +9,7 @@ export(mcstate_rng_pointer)
 export(mcstate_runner_parallel)
 export(mcstate_runner_serial)
 export(mcstate_sample)
+export(mcstate_sampler_hmc)
 export(mcstate_sampler_random_walk)
 importFrom(R6,R6Class)
 useDynLib(mcstate2, .registration = TRUE)
diff --git a/POC_PT.R b/POC_PT.R
@@ -0,0 +1,138 @@
+#Create a bi-modal double Gaussian model
+mixture_gaussians <- function(symetric_means = 5) {
+  mcstate_model(list(
+    parameters = c("x"),
+    direct_sample = function(rng) {
+      rng$random_normal(1)
+    },
+    density = function(x) {
+      log(.5 * dnorm(x+symetric_means) +  .5* dnorm(x-symetric_means))
+    },
+    gradient = function(x){
+     -((x+symetric_means)*dnorm(x+symetric_means)+(x-symetric_means)*dnorm(x-symetric_means))/(dnorm(x+symetric_means) +  dnorm(x-symetric_means)) 
+    },
+    domain = cbind(rep(-Inf, 1), rep(Inf, 1))))
+}
+
+#tempered mixture model
+#beta = 0, density is dnorm(0,1) - our prior density
+#beta = 1, density is dnorm(0,1)*pdf(mixture) - can be seen as our posterior
+tempered_mixture <- function(symetric_means = 5, beta) {
+  mcstate_model(list(
+    parameters = c("x"),
+    direct_sample = function(rng) { #still direct sample from Normal(0,1)
+      rng$random_normal(1)
+    },
+    density = function(x) {
+      #Note that here we assume some sort of Bayesian case with beta = 0 is prior and beta = 1, prior*mixture_gaussian
+      #Note also that the density is unormalised!
+      beta*log(.5 * dnorm(x+symetric_means) +  .5* dnorm(x-symetric_means))+dnorm(x, log=TRUE)
+    },
+    gradient = function(x){
+      beta*(-((x+symetric_means)*dnorm(x+symetric_means)+(x-symetric_means)*dnorm(x-symetric_means))/(dnorm(x+symetric_means) +  dnorm(x-symetric_means)))-x 
+    },
+    domain = cbind(rep(-Inf, 1), rep(Inf, 1))))
+}
+
+#plot(the posterior density)
+m_T <- tempered_mixture(10, 1)
+x <- seq(-15,15,length.out=1000)
+plot(x, exp(m_T$density(x)), type="l")
+
+#Create an HMC sampler
+sampler <- mcstate_sampler_hmc(epsilon = 0.1, n_integration_steps = 10)
+#sampler <- mcstate_sampler_random_walk(vcv = matrix(0.1))
+
+#Sample m using HMC sampler
+set.seed(1)
+res <- mcstate_sample(m_T, sampler, 20) #200000)
+hist(res$pars, breaks=100)
+
+N_temp <- 15
+beta <- seq(0,N_temp)/N_temp
+beta_index <- seq(N_temp+1)
+#Create N_temp+1 "machines" to ease the leap to parallel implementation
+#In a serial version we could just use the same sampler
+PT_machine <- lapply(beta,FUN = function(x) {list(
+  sampler = mcstate_sampler_hmc(epsilon = 0.1, n_integration_steps = 10),
+  beta = x,
+  model = tempered_mixture(10, x),
+  rng = mcstate_rng$new())
+  })
+
+#initialisation of the "machines"
+for(i in seq(N_temp+1)){
+  PT_machine[[i]]$state$pars <- PT_machine[[i]]$model$direct_sample(PT_machine[[i]]$rng)
+  PT_machine[[i]]$state$density <- PT_machine[[i]]$model$density(PT_machine[[i]]$state$pars)
+  PT_machine[[i]]$sampler$initialise(PT_machine[[i]]$model$direct_sample(PT_machine[[i]]$rng),PT_machine[[i]]$model,PT_machine[[i]]$rng)
+}
+
+m <- mixture_gaussians(10) #the likelihood model (target over prior)
+even_step <- FALSE
+n_iterations <- 10000
+x_res <- NULL
+beta_res <- PT_machine[[N_temp+1]]$beta
+index_res <- beta_index
+for(k in seq(n_iterations)){
+
+  #all the machine do 1 HMC step => exploration step
+  for(i in seq(N_temp+1)){
+    if(PT_machine[[i]]$beta==0)
+    {
+      PT_machine[[i]]$state$pars <- PT_machine[[i]]$model$direct_sample(PT_machine[[i]]$rng)
+      PT_machine[[i]]$state$density <- PT_machine[[i]]$model$density(PT_machine[[i]]$state$pars)
+    } else {
+      PT_machine[[i]]$state <- PT_machine[[i]]$sampler$step(PT_machine[[i]]$state,
+                                                            PT_machine[[i]]$model,
+                                                            PT_machine[[i]]$rng)
+    }}
+
+  #communication step
+  if(even_step){
+    index <- which(seq(N_temp)%%2==0)
+    } else {
+      index <- which(seq(N_temp)%%2==1) 
+    }
+
+  for(i in index){
+    #first machine at beta_{i}
+    i1 <- beta_index[i]
+    #second machine at beta_{i+1}
+    i2 <- beta_index[i+1]
+    #acceptance probability for the exhange
+    #m$density is used - the log density of the "likelihood" or target function over prior
+    alpha <- min(0,(beta[i+1]-beta_index[i])*(m$density(PT_machine[[i2]]$state$pars)-m$density(PT_machine[[i1]]$state$pars)))
+
+    if(log(runif(1))<alpha) { #swap
+      beta_index[i] <- i2
+      beta_index[i+1] <- i1
+      #this bit is a bit of a hack, but avoid exchanging the states accross "machines"
+      #note that we still need to recalculate/update the density value
+      #corresponding with the new "temperature"/beta
+      PT_machine[[i2]]$beta <- beta[i]
+      environment(PT_machine[[i2]]$model$density)$beta <- beta[i]
+      PT_machine[[i2]]$state$density <- PT_machine[[i2]]$model$density(PT_machine[[i2]]$state$pars)
+      PT_machine[[i1]]$beta <- beta[i+1]
+      environment(PT_machine[[i1]]$model$density)$beta <- beta[i+1]
+      PT_machine[[i1]]$state$density <- PT_machine[[i1]]$model$density(PT_machine[[i1]]$state$pars)
+    }
+  }
+  even_step <- !even_step
+  i_target <- beta_index[N_temp+1]
+  all_state <- rep(0,N_temp+1)
+  for(i in seq(N_temp+1)){
+    all_state[i] <- PT_machine[[beta_index[i]]]$state$pars
+  }
+  x_res <- rbind(x_res, all_state)
+  #x_res <- rbind(x_res, unlist(PT_machine[[i_target]]$state))
+  index_res <- rbind(index_res, beta_index)
+  beta_res <- c(beta_res, environment(PT_machine[[i_target]]$model$density)$beta)
+}
+
+par(mfrow=c(1,2))
+target_d <- 16
+plot(x_res[,target_d], type="l", col=grey(.8))
+points(x_res[,target_d])
+hist(x_res[,target_d], breaks = 150)
+
+
diff --git a/R/distributions.R b/R/distributions.R
@@ -4,12 +4,18 @@ rmvnorm <- function(x, vcv, rng) {
 }
 
 
-make_rmvnorm <- function(vcv) {
+make_rmvnorm <- function(vcv, centred = FALSE) {
   n <- ncol(vcv)
   r <- chol(vcv, pivot = TRUE)
   r <- r[, order(attr(r, "pivot", exact = TRUE))]
-  function(x, rng) {
-    x + drop(rng$random_normal(n) %*% r)
+  if (centred) {
+    function(rng) {
+      drop(rng$random_normal(n) %*% r)
+    }
+  } else {
+    function(x, rng) {
+      x + drop(rng$random_normal(n) %*% r)
+    }
   }
 }
 

diff --git a/R/sampler-hmc.R b/R/sampler-hmc.R
@@ -0,0 +1,220 @@
+##' Create a Hamiltonian Monte Carlo sampler, implemented using the
+##' leapfrog algorithm.
+##'
+##' @title Create HMC
+##'
+##' @param epsilon The step size of the HMC steps
+##'
+##' @param n_integration_steps The number of HMC steps per step
+##'
+##' @param vcv A variance-covariance matrix for the momentum vector.
+##'   The default uses an identity matrix.
+##'
+##' @param debug Logical, indicating if we should save all
+##'   intermediate points and their gradients.  This will add a vector
+##'   "history" to the details after the integration.  This *will*
+##'   slow things down though as we accumulate the history
+##'   inefficiently.
+##'
+##' @return A `mcstate_sampler` object, which can be used with
+##'   [mcstate_sample]
+##'
+##' @export
+mcstate_sampler_hmc <- function(epsilon = 0.015, n_integration_steps = 10,
+                                vcv = NULL, debug = FALSE) {
+  internal <- new.env()
+
+  if (!is.null(vcv)) {
+    check_vcv(vcv, call = environment())
+  }
+
+  initialise <- function(state, model, rng) {
+    internal$transform <- hmc_transform(model$domain)
+    n_pars <- length(model$parameters)
+    if (is.null(vcv)) {
+      internal$sample_momentum <- function(rng) rng$random_normal(n_pars)
+    } else {
+      if (n_pars != nrow(vcv)) {
+        cli::cli_abort(
+          "Incompatible length parameters ({n_pars}) and vcv ({nrow(vcv)})")
+      }
+      internal$sample_momentum <- make_rmvnorm(vcv, centred = TRUE)
+    }
+    if (debug) {
+      internal$history <- list()
+    }
+  }
+
+  step <- function(state, model, rng) {
+    ## Just a helper for now
+    compute_gradient <- function(x) {
+      internal$transform$deriv(x) * model$gradient(x)
+    }
+
+    theta <- internal$transform$model2rn(state$pars)
+    pars_next <- state$pars
+    theta_next <- theta
+
+    if (debug) {
+      history <- matrix(NA_real_, n_integration_steps + 1, length(theta))
+      history[1, ] <- pars_next
+    }
+
+    v <- internal$sample_momentum(rng)
+
+    ## Make a half step for momentum at the beginning
+    v_next <- v + epsilon * compute_gradient(pars_next) / 2
+
+    for (i in seq_len(n_integration_steps)) {
+      ## Make a full step for the position
+      theta_next <- theta_next + epsilon * v_next
+      pars_next <- internal$transform$rn2model(theta_next)
+      ## Make a full step for the momentum, except at end of trajectory
+      if (i != n_integration_steps) {
+        v_next <- v_next + epsilon * compute_gradient(pars_next)
+      }
+      if (debug) {
+        history[i + 1, ] <- pars_next
+      }
+    }
+
+    ## Make a half step for momentum at the end.
+    v_next <- v_next + epsilon * compute_gradient(pars_next) / 2
+
+    ## Some treatments would negate momentum at end of trajectory to
+    ## make the proposal symmetric by setting v_next <- -v_next but
+    ## this is not actually needed in practice.
+
+    ## Potential energy at the end of the trajectory
+    density_next <- model$density(pars_next)
+
+    ## Kinetic energies at the start (energy) and end (energy_next) of
+    ## the trajectories
+    energy <- sum(v^2) / 2
+    energy_next <- sum(v_next^2) / 2
+
+    ## Accept or reject the state at end of trajectory, returning
+    ## either the position at the end of the trajectory or the initial
+    ## position
+    u <- rng$random_real(1)
+    accept <- u < exp(density_next - state$density + energy - energy_next)
+    if (accept) {
+      state$density <- density_next
+      state$pars <- pars_next
+    }
+
+    if (debug) {
+      internal$history <-
+        c(internal$history, list(list(pars = history, accept = accept)))
+    }
+
+    state
+  }
+
+  finalise <- function(state, model, rng) {
+    if (debug) {
+      pars <- lapply(internal$history, "[[", "pars")
+      pars <- array(unlist(pars),
+                    c(dim(pars[[1]]), length(pars)),
+                    list(NULL, model$parameters, NULL))
+      accept <- vlapply(internal$history, "[[", "accept")
+      list(debug = list(pars = pars, accept = accept))
+    } else {
+      NULL
+    }
+  }
+
+  mcstate_sampler("Hamiltonian Monte Carlo",
+                  initialise,
+                  step,
+                  finalise)
+}
+
+
+hmc_transform <- function(domain) {
+  lower <- domain[, 1]
+  upper <- domain[, 2]
+
+  is_bounded <- is.finite(lower) & is.finite(upper)
+  is_semi_infinite <- is.finite(lower) & upper == Inf
+  is_infinite <- lower == -Inf & upper == Inf
+  unknown <- !(is_bounded | is_semi_infinite | is_infinite)
+  if (any(unknown)) {
+    ## This is not very likely to be thrown
+    cli::cli_abort("Unhandled domain type for parameter {which(unknown)}")
+  }
+
+  ## Save finite bounds for later:
+  a <- lower[is_bounded]
+  b <- upper[is_bounded]
+
+  list(
+    ## Transform from R^n into the model space
+    rn2model = function(theta) {
+      theta[is_semi_infinite] <- exp(theta[is_semi_infinite])
+      theta[is_bounded] <- ilogit_bounded(theta[is_bounded], a, b)
+      theta
+    },
+
+    ## Transform from model space to R^n
+    model2rn = function(x) {
+      x[is_semi_infinite] <- log(x[is_semi_infinite])
+      x[is_bounded] <- logit_bounded(x[is_bounded], a, b)
+      x
+    },
+
+    ## Derivative of rn2model, as a multiplier to f'(x); we pass in
+    ## model parameters here, not R^n space.
+    deriv = function(x) {
+      ## d/dtheta theta -> 1
+      x[is_infinite] <- 1
+      ## Here we want
+      ## d/dtheta f(exp(theta)) -> exp(theta) f'(exp(theta))
+      ##                        -> x f'(x)
+      x[is_semi_infinite] <- x[is_semi_infinite]
+      ## See below, this one is a bit harder
+      x[is_bounded] <- dilogit_bounded(x[is_bounded], a, b)
+      x
+    })
+}
+
+
+## Convert (0, 1) or (a, b) to (-Inf, Inf)
+##
+## Typically this is model -> R^n
+logit_bounded <- function(x, a, b) {
+  p <- (x - a) / (b - a)
+  log(p / (1 - p))
+}
+
+
+## Convert (-Inf, Inf) to (0, 1) or (a, b)
+##
+## Typically this is R^n -> model
+ilogit_bounded <- function(theta, a, b) {
+  p <- exp(theta) / (1 + exp(theta))
+  p * (b - a) + a
+}
+
+
+## The chain rule is d/dtheta f(g(theta)) -> g(theta) f'(g(theta))
+##
+## our g is the translation theta -> x which is
+##
+## >  a + (b - a) exp(theta) / (1 + exp(theta))
+##
+## The derivative of this with respect to theta:
+##
+## > (b - a) exp(theta) / (exp(theta) + 1)^2
+##
+## Substituting the theta -> x transformation log((x - a) / (b - x)) into this
+##
+## > (b - a) exp(log((x - a) / (b - x))) / (exp(log((x - a) / (b - x))) + 1)^2
+## > (a - x) * (b - x) (a - b)
+##
+## In the case a = 0, b = 1 we get x (1 - x) which we expect
+##
+## https://en.wikipedia.org/wiki/Logistic_function#Derivative
+dilogit_bounded <- function(x, a, b) {
+  (a - x) * (b - x) * (a - b)
+}