Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Poc pt #24

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(mcstate_rng_pointer)
export(mcstate_runner_parallel)
export(mcstate_runner_serial)
export(mcstate_sample)
export(mcstate_sampler_hmc)
export(mcstate_sampler_random_walk)
importFrom(R6,R6Class)
useDynLib(mcstate2, .registration = TRUE)
138 changes: 138 additions & 0 deletions POC_PT.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#Create a bi-modal double Gaussian model
mixture_gaussians <- function(symetric_means = 5) {
mcstate_model(list(
parameters = c("x"),
direct_sample = function(rng) {
rng$random_normal(1)
},
density = function(x) {
log(.5 * dnorm(x+symetric_means) + .5* dnorm(x-symetric_means))
},
gradient = function(x){
-((x+symetric_means)*dnorm(x+symetric_means)+(x-symetric_means)*dnorm(x-symetric_means))/(dnorm(x+symetric_means) + dnorm(x-symetric_means))
},
domain = cbind(rep(-Inf, 1), rep(Inf, 1))))
}

#tempered mixture model
#beta = 0, density is dnorm(0,1) - our prior density
#beta = 1, density is dnorm(0,1)*pdf(mixture) - can be seen as our posterior
tempered_mixture <- function(symetric_means = 5, beta) {
mcstate_model(list(
parameters = c("x"),
direct_sample = function(rng) { #still direct sample from Normal(0,1)
rng$random_normal(1)
},
density = function(x) {
#Note that here we assume some sort of Bayesian case with beta = 0 is prior and beta = 1, prior*mixture_gaussian
#Note also that the density is unormalised!
beta*log(.5 * dnorm(x+symetric_means) + .5* dnorm(x-symetric_means))+dnorm(x, log=TRUE)
},
gradient = function(x){
beta*(-((x+symetric_means)*dnorm(x+symetric_means)+(x-symetric_means)*dnorm(x-symetric_means))/(dnorm(x+symetric_means) + dnorm(x-symetric_means)))-x
},
domain = cbind(rep(-Inf, 1), rep(Inf, 1))))
}

#plot(the posterior density)
m_T <- tempered_mixture(10, 1)
x <- seq(-15,15,length.out=1000)
plot(x, exp(m_T$density(x)), type="l")

#Create an HMC sampler
sampler <- mcstate_sampler_hmc(epsilon = 0.1, n_integration_steps = 10)
#sampler <- mcstate_sampler_random_walk(vcv = matrix(0.1))

#Sample m using HMC sampler
set.seed(1)
res <- mcstate_sample(m_T, sampler, 20) #200000)
hist(res$pars, breaks=100)

N_temp <- 15
beta <- seq(0,N_temp)/N_temp
beta_index <- seq(N_temp+1)
#Create N_temp+1 "machines" to ease the leap to parallel implementation
#In a serial version we could just use the same sampler
PT_machine <- lapply(beta,FUN = function(x) {list(
sampler = mcstate_sampler_hmc(epsilon = 0.1, n_integration_steps = 10),
beta = x,
model = tempered_mixture(10, x),
rng = mcstate_rng$new())
})

#initialisation of the "machines"
for(i in seq(N_temp+1)){
PT_machine[[i]]$state$pars <- PT_machine[[i]]$model$direct_sample(PT_machine[[i]]$rng)
PT_machine[[i]]$state$density <- PT_machine[[i]]$model$density(PT_machine[[i]]$state$pars)
PT_machine[[i]]$sampler$initialise(PT_machine[[i]]$model$direct_sample(PT_machine[[i]]$rng),PT_machine[[i]]$model,PT_machine[[i]]$rng)
}

m <- mixture_gaussians(10) #the likelihood model (target over prior)
even_step <- FALSE
n_iterations <- 10000
x_res <- NULL
beta_res <- PT_machine[[N_temp+1]]$beta
index_res <- beta_index
for(k in seq(n_iterations)){

#all the machine do 1 HMC step => exploration step
for(i in seq(N_temp+1)){
if(PT_machine[[i]]$beta==0)
{
PT_machine[[i]]$state$pars <- PT_machine[[i]]$model$direct_sample(PT_machine[[i]]$rng)
PT_machine[[i]]$state$density <- PT_machine[[i]]$model$density(PT_machine[[i]]$state$pars)
} else {
PT_machine[[i]]$state <- PT_machine[[i]]$sampler$step(PT_machine[[i]]$state,
PT_machine[[i]]$model,
PT_machine[[i]]$rng)
}}

#communication step
if(even_step){
index <- which(seq(N_temp)%%2==0)
} else {
index <- which(seq(N_temp)%%2==1)
}

for(i in index){
#first machine at beta_{i}
i1 <- beta_index[i]
#second machine at beta_{i+1}
i2 <- beta_index[i+1]
#acceptance probability for the exhange
#m$density is used - the log density of the "likelihood" or target function over prior
alpha <- min(0,(beta[i+1]-beta_index[i])*(m$density(PT_machine[[i2]]$state$pars)-m$density(PT_machine[[i1]]$state$pars)))

if(log(runif(1))<alpha) { #swap
beta_index[i] <- i2
beta_index[i+1] <- i1
#this bit is a bit of a hack, but avoid exchanging the states accross "machines"
#note that we still need to recalculate/update the density value
#corresponding with the new "temperature"/beta
PT_machine[[i2]]$beta <- beta[i]
environment(PT_machine[[i2]]$model$density)$beta <- beta[i]
PT_machine[[i2]]$state$density <- PT_machine[[i2]]$model$density(PT_machine[[i2]]$state$pars)
PT_machine[[i1]]$beta <- beta[i+1]
environment(PT_machine[[i1]]$model$density)$beta <- beta[i+1]
PT_machine[[i1]]$state$density <- PT_machine[[i1]]$model$density(PT_machine[[i1]]$state$pars)
}
}
even_step <- !even_step
i_target <- beta_index[N_temp+1]
all_state <- rep(0,N_temp+1)
for(i in seq(N_temp+1)){
all_state[i] <- PT_machine[[beta_index[i]]]$state$pars
}
x_res <- rbind(x_res, all_state)
#x_res <- rbind(x_res, unlist(PT_machine[[i_target]]$state))
index_res <- rbind(index_res, beta_index)
beta_res <- c(beta_res, environment(PT_machine[[i_target]]$model$density)$beta)
}

par(mfrow=c(1,2))
target_d <- 16
plot(x_res[,target_d], type="l", col=grey(.8))
points(x_res[,target_d])
hist(x_res[,target_d], breaks = 150)


12 changes: 9 additions & 3 deletions R/distributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,18 @@ rmvnorm <- function(x, vcv, rng) {
}


make_rmvnorm <- function(vcv) {
make_rmvnorm <- function(vcv, centred = FALSE) {
n <- ncol(vcv)
r <- chol(vcv, pivot = TRUE)
r <- r[, order(attr(r, "pivot", exact = TRUE))]
function(x, rng) {
x + drop(rng$random_normal(n) %*% r)
if (centred) {
function(rng) {
drop(rng$random_normal(n) %*% r)
}
} else {
function(x, rng) {
x + drop(rng$random_normal(n) %*% r)
}
}
}

Expand Down
220 changes: 220 additions & 0 deletions R/sampler-hmc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
##' Create a Hamiltonian Monte Carlo sampler, implemented using the
##' leapfrog algorithm.
##'
##' @title Create HMC
##'
##' @param epsilon The step size of the HMC steps
##'
##' @param n_integration_steps The number of HMC steps per step
##'
##' @param vcv A variance-covariance matrix for the momentum vector.
##' The default uses an identity matrix.
##'
##' @param debug Logical, indicating if we should save all
##' intermediate points and their gradients. This will add a vector
##' "history" to the details after the integration. This *will*
##' slow things down though as we accumulate the history
##' inefficiently.
##'
##' @return A `mcstate_sampler` object, which can be used with
##' [mcstate_sample]
##'
##' @export
mcstate_sampler_hmc <- function(epsilon = 0.015, n_integration_steps = 10,
vcv = NULL, debug = FALSE) {
internal <- new.env()

if (!is.null(vcv)) {
check_vcv(vcv, call = environment())
}

initialise <- function(state, model, rng) {
internal$transform <- hmc_transform(model$domain)
n_pars <- length(model$parameters)
if (is.null(vcv)) {
internal$sample_momentum <- function(rng) rng$random_normal(n_pars)
} else {
if (n_pars != nrow(vcv)) {
cli::cli_abort(
"Incompatible length parameters ({n_pars}) and vcv ({nrow(vcv)})")
}
internal$sample_momentum <- make_rmvnorm(vcv, centred = TRUE)
}
if (debug) {
internal$history <- list()
}
}

step <- function(state, model, rng) {
## Just a helper for now
compute_gradient <- function(x) {
internal$transform$deriv(x) * model$gradient(x)
}

theta <- internal$transform$model2rn(state$pars)
pars_next <- state$pars
theta_next <- theta

if (debug) {
history <- matrix(NA_real_, n_integration_steps + 1, length(theta))
history[1, ] <- pars_next
}

v <- internal$sample_momentum(rng)

## Make a half step for momentum at the beginning
v_next <- v + epsilon * compute_gradient(pars_next) / 2

for (i in seq_len(n_integration_steps)) {
## Make a full step for the position
theta_next <- theta_next + epsilon * v_next
pars_next <- internal$transform$rn2model(theta_next)
## Make a full step for the momentum, except at end of trajectory
if (i != n_integration_steps) {
v_next <- v_next + epsilon * compute_gradient(pars_next)
}
if (debug) {
history[i + 1, ] <- pars_next
}
}

## Make a half step for momentum at the end.
v_next <- v_next + epsilon * compute_gradient(pars_next) / 2

## Some treatments would negate momentum at end of trajectory to
## make the proposal symmetric by setting v_next <- -v_next but
## this is not actually needed in practice.

## Potential energy at the end of the trajectory
density_next <- model$density(pars_next)

## Kinetic energies at the start (energy) and end (energy_next) of
## the trajectories
energy <- sum(v^2) / 2
energy_next <- sum(v_next^2) / 2

## Accept or reject the state at end of trajectory, returning
## either the position at the end of the trajectory or the initial
## position
u <- rng$random_real(1)
accept <- u < exp(density_next - state$density + energy - energy_next)
if (accept) {
state$density <- density_next
state$pars <- pars_next
}

if (debug) {
internal$history <-
c(internal$history, list(list(pars = history, accept = accept)))
}

state
}

finalise <- function(state, model, rng) {
if (debug) {
pars <- lapply(internal$history, "[[", "pars")
pars <- array(unlist(pars),
c(dim(pars[[1]]), length(pars)),
list(NULL, model$parameters, NULL))
accept <- vlapply(internal$history, "[[", "accept")
list(debug = list(pars = pars, accept = accept))
} else {
NULL
}
}

mcstate_sampler("Hamiltonian Monte Carlo",
initialise,
step,
finalise)
}


hmc_transform <- function(domain) {
lower <- domain[, 1]
upper <- domain[, 2]

is_bounded <- is.finite(lower) & is.finite(upper)
is_semi_infinite <- is.finite(lower) & upper == Inf
is_infinite <- lower == -Inf & upper == Inf
unknown <- !(is_bounded | is_semi_infinite | is_infinite)
if (any(unknown)) {
## This is not very likely to be thrown
cli::cli_abort("Unhandled domain type for parameter {which(unknown)}")
}

## Save finite bounds for later:
a <- lower[is_bounded]
b <- upper[is_bounded]

list(
## Transform from R^n into the model space
rn2model = function(theta) {
theta[is_semi_infinite] <- exp(theta[is_semi_infinite])
theta[is_bounded] <- ilogit_bounded(theta[is_bounded], a, b)
theta
},

## Transform from model space to R^n
model2rn = function(x) {
x[is_semi_infinite] <- log(x[is_semi_infinite])
x[is_bounded] <- logit_bounded(x[is_bounded], a, b)
x
},

## Derivative of rn2model, as a multiplier to f'(x); we pass in
## model parameters here, not R^n space.
deriv = function(x) {
## d/dtheta theta -> 1
x[is_infinite] <- 1
## Here we want
## d/dtheta f(exp(theta)) -> exp(theta) f'(exp(theta))
## -> x f'(x)
x[is_semi_infinite] <- x[is_semi_infinite]
## See below, this one is a bit harder
x[is_bounded] <- dilogit_bounded(x[is_bounded], a, b)
x
})
}


## Convert (0, 1) or (a, b) to (-Inf, Inf)
##
## Typically this is model -> R^n
logit_bounded <- function(x, a, b) {
p <- (x - a) / (b - a)
log(p / (1 - p))
}


## Convert (-Inf, Inf) to (0, 1) or (a, b)
##
## Typically this is R^n -> model
ilogit_bounded <- function(theta, a, b) {
p <- exp(theta) / (1 + exp(theta))
p * (b - a) + a
}


## The chain rule is d/dtheta f(g(theta)) -> g(theta) f'(g(theta))
##
## our g is the translation theta -> x which is
##
## > a + (b - a) exp(theta) / (1 + exp(theta))
##
## The derivative of this with respect to theta:
##
## > (b - a) exp(theta) / (exp(theta) + 1)^2
##
## Substituting the theta -> x transformation log((x - a) / (b - x)) into this
##
## > (b - a) exp(log((x - a) / (b - x))) / (exp(log((x - a) / (b - x))) + 1)^2
## > (a - x) * (b - x) (a - b)
##
## In the case a = 0, b = 1 we get x (1 - x) which we expect
##
## https://en.wikipedia.org/wiki/Logistic_function#Derivative
dilogit_bounded <- function(x, a, b) {
(a - x) * (b - x) * (a - b)
}
Loading