From 4db58086b5394e859c27af13624c395425cc5598 Mon Sep 17 00:00:00 2001 From: "michael.williams" Date: Wed, 6 Sep 2023 09:17:47 -0700 Subject: [PATCH 01/24] add basic support for nessai sampler --- pycbc/inference/io/__init__.py | 2 + pycbc/inference/io/nessai.py | 8 ++ pycbc/inference/sampler/__init__.py | 7 ++ pycbc/inference/sampler/nessai.py | 186 ++++++++++++++++++++++++++++ 4 files changed, 203 insertions(+) create mode 100644 pycbc/inference/io/nessai.py create mode 100644 pycbc/inference/sampler/nessai.py diff --git a/pycbc/inference/io/__init__.py b/pycbc/inference/io/__init__.py index 93261f677be..b17ddd7a701 100644 --- a/pycbc/inference/io/__init__.py +++ b/pycbc/inference/io/__init__.py @@ -37,6 +37,7 @@ from .multinest import MultinestFile from .dynesty import DynestyFile from .ultranest import UltranestFile +from .nessai import NessaiFile from .posterior import PosteriorFile from .txt import InferenceTXTFile @@ -49,6 +50,7 @@ DynestyFile.name: DynestyFile, PosteriorFile.name: PosteriorFile, UltranestFile.name: UltranestFile, + NessaiFile.name : NessaiFile, } try: diff --git a/pycbc/inference/io/nessai.py b/pycbc/inference/io/nessai.py new file mode 100644 index 00000000000..96734687698 --- /dev/null +++ b/pycbc/inference/io/nessai.py @@ -0,0 +1,8 @@ +"""Provides IO for the nessai sampler""" +from .base_nested_sampler import BaseNestedSamplerFile + + +class NessaiFile(BaseNestedSamplerFile): + """Class to handle file IO for the ``nessai`` sampler.""" + + name = 'nessai_file' diff --git a/pycbc/inference/sampler/__init__.py b/pycbc/inference/sampler/__init__.py index 1b83f52d6cf..41da16f39c6 100644 --- a/pycbc/inference/sampler/__init__.py +++ b/pycbc/inference/sampler/__init__.py @@ -66,6 +66,13 @@ except ImportError: pass +try: + from .nessai import NessaiSampler + samplers[NessaiSampler.name] = NessaiSampler +except ImportError: + pass + + def load_from_config(cp, model, **kwargs): """Loads a sampler from the given config file. diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py new file mode 100644 index 00000000000..6c9f485f33f --- /dev/null +++ b/pycbc/inference/sampler/nessai.py @@ -0,0 +1,186 @@ +""" +This modules provides class for using the nessai sampler package for parameter +estimation. +""" +import os + +import nessai.flowsampler +import nessai.model +import nessai.livepoint +import numpy.lib.recfunctions as rfn + +from .base import BaseSampler, setup_output +from .base_mcmc import get_optional_arg_from_config +from ..io import NessaiFile + + +class NessaiSampler(BaseSampler): + """Class to construct a FlowSampler from the nessai package.""" + + name = "nessai" + _io = NessaiFile + + def __init__(self, model, nlive, loglikelihood_function, **kwargs): + super().__init__(model) + + # TODO: add other options + self.nlive = nlive + self.model_call = NessaiModel(self.model, loglikelihood_function) + + #TODO: handle multiprocessing + + self._sampler = None + self._nested_samples = None + self._posterior_samples = None + self._logz = None + self._dlogz = None + self.checkpoint_file = None + + @property + def io(self): + return self._io + + @property + def model_stats(self): + return { + "loglikelihood": self._sampler.posterior_samples["logL"], + "logprior": self._sampler.posterior_samples["logP"], + } + + @property + def samples(self): + return nessai.livepoint.live_points_to_dict( + self._sampler.posterior_samples, + self.model.sampling_params, + ) + + def run(self, resume=False): + out_dir = os.path.dirname(os.path.abspath(self.checkpoint_file)) + if self._sampler is None: + self._sampler = nessai.flowsampler.FlowSampler( + self.model_call, + output=out_dir, + nlive=self.nlive, + resume=resume, + ) + self._sampler.run() + + @classmethod + def from_config(cls, cp, model, output_file=None, nprocesses=1, + use_mpi=False): + """ + Loads the sampler from the given config file. + """ + section = "sampler" + # check name + assert cp.get(section, "name") == cls.name, ( + "name in section [sampler] must match mine") + # get the number of live points to use + # TODO: add other options + nlive = int(cp.get(section, "nlive")) + + loglikelihood_function = \ + get_optional_arg_from_config(cp, section, 'loglikelihood-function') + obj = cls( + model, + nlive=nlive, + loglikelihood_function=loglikelihood_function, + ) + + setup_output(obj, output_file, check_nsamples=False) + if not obj.new_checkpoint: + obj.resume_from_checkpoint() + return obj + + def set_initial_conditions(self, initial_distribution=None, + samples_file=None): + """Sets up the starting point for the sampler. + + Should also set the sampler's random state. + """ + pass + + def checkpoint(self): + self._sampler.ns.checkpoint() + + def resume_from_checkpoint(self): + # TODO: check this works + self.run(resume=True) + + def finalize(self): + logz = self._sampler.log_evidence + dlogz = self._sampler.log_evidence_error + + for fn in [self.checkpoint_file]: + with self.io(fn, "a") as fp: + fp.write_logevidence(logz, dlogz) + for fn in [self.checkpoint_file, self.backup_file]: + self.write_results(fn) + + def write_results(self, filename): + with self.io(filename, "a") as fp: + fp.write_samples(self.samples, self.model.sampling_params) + fp.write_samples(self.model_stats) + fp.write_logevidence( + self._sampler.log_evidence, + self._sampler.log_evidence_error, + ) + + +class NessaiModel(nessai.model.Model): + """Wrapper for PyCBC Inference model class for use with nessai. + + Parameters + ---------- + model : inference.BaseModel instance + A model instance from PyCBC. + loglikelihood_function : str + Name of the log-likelihood method to call. + """ + def __init__(self, model, loglikelihood_function=None): + self.model = model + self.names = list(model.sampling_params) + + # Configure the log-likelihood function + if loglikelihood_function is None: + loglikelihood_function = 'loglikelihood' + self.loglikelihood_function = loglikelihood_function + + # Configure the priors bounds + bounds = {} + for dist in model.prior_distribution.distributions: + bounds.update(**{k: [v.min, v.max] for k, v in dist.bounds.items()}) + self.bounds = bounds + + # Prior and likelihood are not vectorised + self.vectorised_likelihood = False + self.vectorised_prior = False + + def to_dict(self, x): + return {n: x[n].item() for n in self.names} + + def to_live_points(self, x): + """Convert to the structured arrays used by nessai""" + # TODO: could this be improved? + return nessai.livepoint.numpy_array_to_live_points( + rfn.structured_to_unstructured(x), + self.names, + ) + + def new_point(self, N=1): + """Draw a new point""" + return self.to_live_points(self.model.prior_rvs(size=N)) + + def new_point_log_prob(self, x): + """Log-probability for the ``new_point`` method""" + return self.batch_evaluate_log_prior(x) + + def log_prior(self, x): + """Compute the log-prior""" + self.model.update(**self.to_dict(x)) + return self.model.logprior + + def log_likelihood(self, x): + """Compute the log-likelihood""" + self.model.update(**self.to_dict(x)) + return getattr(self.model, self.loglikelihood_function) From 155de12deda64160f3bf8eea9aa95d952edbb977 Mon Sep 17 00:00:00 2001 From: "michael.williams" Date: Tue, 12 Sep 2023 05:59:05 -0700 Subject: [PATCH 02/24] enable all options and resuming in nessai --- pycbc/inference/sampler/nessai.py | 110 +++++++++++++++++++++++++----- 1 file changed, 94 insertions(+), 16 deletions(-) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index 6c9f485f33f..fd93dba4acd 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -2,16 +2,21 @@ This modules provides class for using the nessai sampler package for parameter estimation. """ +import ast +import logging import os import nessai.flowsampler import nessai.model import nessai.livepoint +import nessai.utils.multiprocessing +import nessai.utils.settings import numpy.lib.recfunctions as rfn from .base import BaseSampler, setup_output from .base_mcmc import get_optional_arg_from_config from ..io import NessaiFile +from ...pool import choose_pool class NessaiSampler(BaseSampler): @@ -20,14 +25,27 @@ class NessaiSampler(BaseSampler): name = "nessai" _io = NessaiFile - def __init__(self, model, nlive, loglikelihood_function, **kwargs): + def __init__( + self, + model, + loglikelihood_function, + nlive=1000, + nprocesses=1, + use_mpi=False, + run_kwds=None, + extra_kwds=None, + ): super().__init__(model) - # TODO: add other options self.nlive = nlive self.model_call = NessaiModel(self.model, loglikelihood_function) - #TODO: handle multiprocessing + self.extra_kwds = extra_kwds if extra_kwds is not None else {} + self.run_kwds = run_kwds if run_kwds is not None else {} + + nessai.utils.multiprocessing.initialise_pool_variables(self.model_call) + self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) + self.nprocesses = nprocesses self._sampler = None self._nested_samples = None @@ -46,7 +64,7 @@ def model_stats(self): "loglikelihood": self._sampler.posterior_samples["logL"], "logprior": self._sampler.posterior_samples["logP"], } - + @property def samples(self): return nessai.livepoint.live_points_to_dict( @@ -54,16 +72,21 @@ def samples(self): self.model.sampling_params, ) - def run(self, resume=False): + def run(self, resume=True): out_dir = os.path.dirname(os.path.abspath(self.checkpoint_file)) + print(self.checkpoint_file) if self._sampler is None: self._sampler = nessai.flowsampler.FlowSampler( self.model_call, output=out_dir, - nlive=self.nlive, resume=resume, + pool=self.pool, + n_pool=self.nprocesses, + close_pool=False, + signal_handling=False, + **self.extra_kwds, ) - self._sampler.run() + self._sampler.run(**self.run_kwds) @classmethod def from_config(cls, cp, model, output_file=None, nprocesses=1, @@ -76,15 +99,64 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, assert cp.get(section, "name") == cls.name, ( "name in section [sampler] must match mine") # get the number of live points to use - # TODO: add other options - nlive = int(cp.get(section, "nlive")) + if cp.has_option(section, "importance_nested_sampler"): + importance_nested_sampler = cp.get( + section, "importance_nested_sampler", + ) + else: + importance_nested_sampler = False + + # Determine all possible keyword arguments that are not hardcoded + default_kwds, default_run_kwds = nessai.utils.settings.get_all_kwargs( + importance_nested_sampler=importance_nested_sampler, + split_kwargs=True, + ) + + # Keyword arguments the user cannot configure via the config + remove_kwds = [ + "resume", "pool", "n_pool", "close_pool", "signal_handling" + ] + + for kwd in remove_kwds: + default_kwds.pop(kwd, None) + default_run_kwds.pop(kwd, None) + + kwds = {} + run_kwds = {} + + for d_out, d_defaults in zip( + [kwds, run_kwds], [default_kwds, default_run_kwds] + ): + for k in d_defaults.keys(): + if cp.has_option(section, k): + d_out[k] = ast.literal_eval(cp.get(section, k)) + + # Specified kwds + ignore_kwds = {"nlive", "name"} + invalid_kwds = ( + cp[section].keys() + - set().union(kwds.keys(), run_kwds.keys()) + - ignore_kwds + ) + + if invalid_kwds: + raise RuntimeError( + f"Config contains unknown options: {invalid_kwds}" + ) + + logging.warning(f"nessai keyword arguments: {kwds}") + logging.warning(f"nessai run keyword arguments: {run_kwds}") loglikelihood_function = \ get_optional_arg_from_config(cp, section, 'loglikelihood-function') + obj = cls( model, - nlive=nlive, loglikelihood_function=loglikelihood_function, + nprocesses=nprocesses, + use_mpi=use_mpi, + run_kwds=run_kwds, + extra_kwds=kwds, ) setup_output(obj, output_file, check_nsamples=False) @@ -92,8 +164,11 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, obj.resume_from_checkpoint() return obj - def set_initial_conditions(self, initial_distribution=None, - samples_file=None): + def set_initial_conditions( + self, + initial_distribution=None, + samples_file=None, + ): """Sets up the starting point for the sampler. Should also set the sampler's random state. @@ -104,7 +179,6 @@ def checkpoint(self): self._sampler.ns.checkpoint() def resume_from_checkpoint(self): - # TODO: check this works self.run(resume=True) def finalize(self): @@ -129,7 +203,7 @@ def write_results(self, filename): class NessaiModel(nessai.model.Model): """Wrapper for PyCBC Inference model class for use with nessai. - + Parameters ---------- model : inference.BaseModel instance @@ -149,16 +223,20 @@ def __init__(self, model, loglikelihood_function=None): # Configure the priors bounds bounds = {} for dist in model.prior_distribution.distributions: - bounds.update(**{k: [v.min, v.max] for k, v in dist.bounds.items()}) + bounds.update( + **{k: [v.min, v.max] for k, v in dist.bounds.items()} + ) self.bounds = bounds # Prior and likelihood are not vectorised self.vectorised_likelihood = False self.vectorised_prior = False + # Use the pool for computing the prior + self.parallelise_prior = True def to_dict(self, x): return {n: x[n].item() for n in self.names} - + def to_live_points(self, x): """Convert to the structured arrays used by nessai""" # TODO: could this be improved? From 6d612e6091469078c0ffb8f751f314f518fbbf53 Mon Sep 17 00:00:00 2001 From: mj-will Date: Tue, 26 Sep 2023 13:24:51 +0100 Subject: [PATCH 03/24] fix prior bounds in nessai model --- pycbc/inference/sampler/nessai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index fd93dba4acd..fc2d1c929b1 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -224,7 +224,7 @@ def __init__(self, model, loglikelihood_function=None): bounds = {} for dist in model.prior_distribution.distributions: bounds.update( - **{k: [v.min, v.max] for k, v in dist.bounds.items()} + **{k: [v.min, v.max] for k, v in dist.bounds.items() if k in self.names} ) self.bounds = bounds From 54f18b212e4d858267d774c8458b18f362b547b1 Mon Sep 17 00:00:00 2001 From: mj-will Date: Fri, 29 Sep 2023 15:33:33 +0100 Subject: [PATCH 04/24] tweak resuming and samples in nessai interface --- pycbc/inference/sampler/nessai.py | 42 +++++++++++++++++-------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index fc2d1c929b1..ea13a1c2a1c 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -67,19 +67,24 @@ def model_stats(self): @property def samples(self): - return nessai.livepoint.live_points_to_dict( - self._sampler.posterior_samples, + """The raw nested samples including the corresponding weights""" + samples = nessai.livepoint.live_points_to_dict( + self._sampler.nested_samples, self.model.sampling_params, ) - - def run(self, resume=True): - out_dir = os.path.dirname(os.path.abspath(self.checkpoint_file)) - print(self.checkpoint_file) + samples["logwt"] = self._sampler.ns.state.log_posterior_weights + samples["loglikelihood"] = self._sampler.nested_samples["loglikelihood"] + return samples + + def run(self): + out_dir = os.path.join( + os.path.dirname(os.path.abspath(self.checkpoint_file)), + "nessai", + ) if self._sampler is None: self._sampler = nessai.flowsampler.FlowSampler( self.model_call, output=out_dir, - resume=resume, pool=self.pool, n_pool=self.nprocesses, close_pool=False, @@ -89,8 +94,9 @@ def run(self, resume=True): self._sampler.run(**self.run_kwds) @classmethod - def from_config(cls, cp, model, output_file=None, nprocesses=1, - use_mpi=False): + def from_config( + cls, cp, model, output_file=None, nprocesses=1, use_mpi=False + ): """ Loads the sampler from the given config file. """ @@ -98,7 +104,7 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, # check name assert cp.get(section, "name") == cls.name, ( "name in section [sampler] must match mine") - # get the number of live points to use + if cp.has_option(section, "importance_nested_sampler"): importance_nested_sampler = cp.get( section, "importance_nested_sampler", @@ -114,7 +120,7 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, # Keyword arguments the user cannot configure via the config remove_kwds = [ - "resume", "pool", "n_pool", "close_pool", "signal_handling" + "pool", "n_pool", "close_pool", "signal_handling" ] for kwd in remove_kwds: @@ -143,9 +149,8 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, raise RuntimeError( f"Config contains unknown options: {invalid_kwds}" ) - - logging.warning(f"nessai keyword arguments: {kwds}") - logging.warning(f"nessai run keyword arguments: {run_kwds}") + logging.info(f"nessai keyword arguments: {kwds}") + logging.info(f"nessai run keyword arguments: {run_kwds}") loglikelihood_function = \ get_optional_arg_from_config(cp, section, 'loglikelihood-function') @@ -159,9 +164,7 @@ def from_config(cls, cp, model, output_file=None, nprocesses=1, extra_kwds=kwds, ) - setup_output(obj, output_file, check_nsamples=False) - if not obj.new_checkpoint: - obj.resume_from_checkpoint() + setup_output(obj, output_file, check_nsamples=False, validate=False) return obj def set_initial_conditions( @@ -177,9 +180,11 @@ def set_initial_conditions( def checkpoint(self): self._sampler.ns.checkpoint() + for fn in [self.checkpoint_file, self.backup_file]: + self.write_results(fn) def resume_from_checkpoint(self): - self.run(resume=True) + pass def finalize(self): logz = self._sampler.log_evidence @@ -227,7 +232,6 @@ def __init__(self, model, loglikelihood_function=None): **{k: [v.min, v.max] for k, v in dist.bounds.items() if k in self.names} ) self.bounds = bounds - # Prior and likelihood are not vectorised self.vectorised_likelihood = False self.vectorised_prior = False From 94c27b2f4e2c7c5993d4fffd0df5df6bcdead119 Mon Sep 17 00:00:00 2001 From: mj-will Date: Wed, 4 Oct 2023 14:13:15 +0100 Subject: [PATCH 05/24] change outdir to avoid namespace conflicts --- pycbc/inference/sampler/nessai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index ea13a1c2a1c..112eb5578ad 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -44,7 +44,7 @@ def __init__( self.run_kwds = run_kwds if run_kwds is not None else {} nessai.utils.multiprocessing.initialise_pool_variables(self.model_call) - self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) + self.pool = choose_pool(mpi=use_mpi, processes=nprocesses) self.nprocesses = nprocesses self._sampler = None @@ -73,13 +73,13 @@ def samples(self): self.model.sampling_params, ) samples["logwt"] = self._sampler.ns.state.log_posterior_weights - samples["loglikelihood"] = self._sampler.nested_samples["loglikelihood"] + samples["loglikelihood"] = self._sampler.nested_samples["logL"] return samples def run(self): out_dir = os.path.join( os.path.dirname(os.path.abspath(self.checkpoint_file)), - "nessai", + "outdir_nessai", ) if self._sampler is None: self._sampler = nessai.flowsampler.FlowSampler( From 809aef1ebd161b875018771912c471d7edefbdcd Mon Sep 17 00:00:00 2001 From: mj-will Date: Wed, 4 Oct 2023 17:02:15 +0100 Subject: [PATCH 06/24] tweaks to nessai sampler class --- pycbc/inference/sampler/nessai.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index 112eb5578ad..674cff43ccf 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -77,6 +77,7 @@ def samples(self): return samples def run(self): + """Run the sampler""" out_dir = os.path.join( os.path.dirname(os.path.abspath(self.checkpoint_file)), "outdir_nessai", @@ -112,6 +113,11 @@ def from_config( else: importance_nested_sampler = False + if importance_nested_sampler is True: + raise NotImplementedError( + "Importance nested sampler is not currently supported" + ) + # Determine all possible keyword arguments that are not hardcoded default_kwds, default_run_kwds = nessai.utils.settings.get_all_kwargs( importance_nested_sampler=importance_nested_sampler, @@ -174,29 +180,32 @@ def set_initial_conditions( ): """Sets up the starting point for the sampler. - Should also set the sampler's random state. + This is not used for nessai. """ pass def checkpoint(self): + """Checkpoint the sampler""" self._sampler.ns.checkpoint() for fn in [self.checkpoint_file, self.backup_file]: self.write_results(fn) def resume_from_checkpoint(self): + # nessai will from its own checkpoint file pass def finalize(self): + """Finalize sampling""" logz = self._sampler.log_evidence dlogz = self._sampler.log_evidence_error - for fn in [self.checkpoint_file]: - with self.io(fn, "a") as fp: - fp.write_logevidence(logz, dlogz) + logging.info(f"log Z, dlog Z: {logz}, {dlogz}") + for fn in [self.checkpoint_file, self.backup_file]: self.write_results(fn) def write_results(self, filename): + """Write the results to a given file""" with self.io(filename, "a") as fp: fp.write_samples(self.samples, self.model.sampling_params) fp.write_samples(self.model_stats) @@ -266,3 +275,13 @@ def log_likelihood(self, x): """Compute the log-likelihood""" self.model.update(**self.to_dict(x)) return getattr(self.model, self.loglikelihood_function) + + def from_unit_hypercube(self, x): + """Map from the unit-hypercube to the prior.""" + # Needs to be implemented for importance nested sampler + raise NotImplementedError + + def to_unit_hypercube(self, x): + """Map to the unit-hypercube to the prior.""" + # Needs to be implemented for importance nested sampler + raise NotImplementedError From 7eb9bad77c0f3881b170fc5fe335edf291b3544a Mon Sep 17 00:00:00 2001 From: mj-will Date: Fri, 6 Oct 2023 17:01:01 +0100 Subject: [PATCH 07/24] fix nessai checkpointing and other minor tweaks --- pycbc/inference/io/nessai.py | 24 ++++++- pycbc/inference/sampler/nessai.py | 103 +++++++++++++++++++++--------- 2 files changed, 97 insertions(+), 30 deletions(-) diff --git a/pycbc/inference/io/nessai.py b/pycbc/inference/io/nessai.py index 96734687698..abbd98f818c 100644 --- a/pycbc/inference/io/nessai.py +++ b/pycbc/inference/io/nessai.py @@ -1,8 +1,30 @@ """Provides IO for the nessai sampler""" + from .base_nested_sampler import BaseNestedSamplerFile +from ...io.hdf import dump_state, load_state +from .posterior import write_samples_to_file + class NessaiFile(BaseNestedSamplerFile): """Class to handle file IO for the ``nessai`` sampler.""" - name = 'nessai_file' + name = "nessai_file" + + def write_pickled_data_into_checkpoint_file(self, data): + """Write the pickled data into a checkpoint file""" + if "sampler_info/saved_state" not in self: + self.create_group("sampler_info/saved_state") + dump_state(data, self, path="sampler_info/saved_state") + + def read_pickled_data_from_checkpoint_file(self): + """Read the pickled data from a checkpoint file""" + return load_state(self, path="sampler_info/saved_state") + + def write_raw_samples(self, data, parameters=None): + """Write the nested samples to the file""" + if "samples" not in self: + self.create_group("samples") + write_samples_to_file( + self, data, parameters=parameters, group="samples" + ) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index 674cff43ccf..1fd56a57534 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -1,6 +1,8 @@ """ This modules provides class for using the nessai sampler package for parameter estimation. + +Documentation for nessai: https://nessai.readthedocs.io/en/latest/ """ import ast import logging @@ -15,7 +17,7 @@ from .base import BaseSampler, setup_output from .base_mcmc import get_optional_arg_from_config -from ..io import NessaiFile +from ..io import NessaiFile, loadfile from ...pool import choose_pool @@ -53,6 +55,7 @@ def __init__( self._logz = None self._dlogz = None self.checkpoint_file = None + self.resume_data = None @property def io(self): @@ -60,10 +63,7 @@ def io(self): @property def model_stats(self): - return { - "loglikelihood": self._sampler.posterior_samples["logL"], - "logprior": self._sampler.posterior_samples["logP"], - } + pass @property def samples(self): @@ -74,15 +74,36 @@ def samples(self): ) samples["logwt"] = self._sampler.ns.state.log_posterior_weights samples["loglikelihood"] = self._sampler.nested_samples["logL"] + samples["logprior"] = self._sampler.nested_samples["logP"] + samples["it"] = self._sampler.nested_samples["it"] return samples - def run(self): + def run(self, **kwargs): """Run the sampler""" out_dir = os.path.join( os.path.dirname(os.path.abspath(self.checkpoint_file)), "outdir_nessai", ) + default_kwds, default_run_kwds = self.get_default_kwds( + importance_nested_sampler=self.extra_kwds.get( + "importance_nested_sampler", False + ) + ) + + extra_kwds = self.extra_kwds.copy() + run_kwds = self.run_kwds.copy() + + if kwargs is not None: + logging.info(f"Updating keyword arguments with {kwargs}") + extra_kwds.update( + {k: v for k, v in kwargs.items() if k in default_kwds} + ) + run_kwds.update( + {k: v for k, v in kwargs.items() if k in default_run_kwds} + ) + if self._sampler is None: + logging.info("Initialising nessai FlowSampler") self._sampler = nessai.flowsampler.FlowSampler( self.model_call, output=out_dir, @@ -90,9 +111,19 @@ def run(self): n_pool=self.nprocesses, close_pool=False, signal_handling=False, - **self.extra_kwds, + resume_data=self.resume_data, + **extra_kwds, ) - self._sampler.run(**self.run_kwds) + logging.info("Starting sampling with nessai") + self._sampler.run(**run_kwds) + + @staticmethod + def get_default_kwds(importance_nested_sampler=False): + # Determine all possible keyword arguments that are not hardcoded + return nessai.utils.settings.get_all_kwargs( + importance_nested_sampler=importance_nested_sampler, + split_kwargs=True, + ) @classmethod def from_config( @@ -103,31 +134,28 @@ def from_config( """ section = "sampler" # check name - assert cp.get(section, "name") == cls.name, ( - "name in section [sampler] must match mine") + assert ( + cp.get(section, "name") == cls.name + ), "name in section [sampler] must match mine" if cp.has_option(section, "importance_nested_sampler"): importance_nested_sampler = cp.get( - section, "importance_nested_sampler", + section, + "importance_nested_sampler", ) else: importance_nested_sampler = False - if importance_nested_sampler is True: raise NotImplementedError( "Importance nested sampler is not currently supported" ) - # Determine all possible keyword arguments that are not hardcoded - default_kwds, default_run_kwds = nessai.utils.settings.get_all_kwargs( - importance_nested_sampler=importance_nested_sampler, - split_kwargs=True, + default_kwds, default_run_kwds = cls.get_default_kwds( + importance_nested_sampler ) # Keyword arguments the user cannot configure via the config - remove_kwds = [ - "pool", "n_pool", "close_pool", "signal_handling" - ] + remove_kwds = ["pool", "n_pool", "close_pool", "signal_handling"] for kwd in remove_kwds: default_kwds.pop(kwd, None) @@ -158,8 +186,9 @@ def from_config( logging.info(f"nessai keyword arguments: {kwds}") logging.info(f"nessai run keyword arguments: {run_kwds}") - loglikelihood_function = \ - get_optional_arg_from_config(cp, section, 'loglikelihood-function') + loglikelihood_function = get_optional_arg_from_config( + cp, section, "loglikelihood-function" + ) obj = cls( model, @@ -170,7 +199,9 @@ def from_config( extra_kwds=kwds, ) - setup_output(obj, output_file, check_nsamples=False, validate=False) + setup_output(obj, output_file, check_nsamples=False) + if not obj.new_checkpoint: + obj.resume_from_checkpoint() return obj def set_initial_conditions( @@ -186,13 +217,21 @@ def set_initial_conditions( def checkpoint(self): """Checkpoint the sampler""" - self._sampler.ns.checkpoint() for fn in [self.checkpoint_file, self.backup_file]: + with self.io(fn, "a") as fp: + fp.write_pickled_data_into_checkpoint_file(self._sampler.ns) self.write_results(fn) def resume_from_checkpoint(self): - # nessai will from its own checkpoint file - pass + """Reads the resume data from the checkpoint file.""" + try: + with loadfile(self.checkpoint_file, "r") as fp: + self.resume_data = fp.read_pickled_data_from_checkpoint_file() + logging.info( + f"Found valid checkpoint file: {self.checkpoint_file}" + ) + except Exception as e: + logging.info("Failed to load checkpoint file with error: {e}") def finalize(self): """Finalize sampling""" @@ -201,14 +240,15 @@ def finalize(self): logging.info(f"log Z, dlog Z: {logz}, {dlogz}") + self.checkpoint() + for fn in [self.checkpoint_file, self.backup_file]: self.write_results(fn) def write_results(self, filename): """Write the results to a given file""" with self.io(filename, "a") as fp: - fp.write_samples(self.samples, self.model.sampling_params) - fp.write_samples(self.model_stats) + fp.write_raw_samples(self.samples) fp.write_logevidence( self._sampler.log_evidence, self._sampler.log_evidence_error, @@ -225,20 +265,25 @@ class NessaiModel(nessai.model.Model): loglikelihood_function : str Name of the log-likelihood method to call. """ + def __init__(self, model, loglikelihood_function=None): self.model = model self.names = list(model.sampling_params) # Configure the log-likelihood function if loglikelihood_function is None: - loglikelihood_function = 'loglikelihood' + loglikelihood_function = "loglikelihood" self.loglikelihood_function = loglikelihood_function # Configure the priors bounds bounds = {} for dist in model.prior_distribution.distributions: bounds.update( - **{k: [v.min, v.max] for k, v in dist.bounds.items() if k in self.names} + **{ + k: [v.min, v.max] + for k, v in dist.bounds.items() + if k in self.names + } ) self.bounds = bounds # Prior and likelihood are not vectorised From 4169605737bcc3dcaba9dfdb4182e6ed27a27833 Mon Sep 17 00:00:00 2001 From: mj-will Date: Mon, 16 Oct 2023 14:33:32 +0100 Subject: [PATCH 08/24] fix for reading in nessai result files --- pycbc/inference/io/nessai.py | 43 ++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/pycbc/inference/io/nessai.py b/pycbc/inference/io/nessai.py index abbd98f818c..b05b81c6461 100644 --- a/pycbc/inference/io/nessai.py +++ b/pycbc/inference/io/nessai.py @@ -1,16 +1,55 @@ """Provides IO for the nessai sampler""" +import numpy from .base_nested_sampler import BaseNestedSamplerFile from ...io.hdf import dump_state, load_state -from .posterior import write_samples_to_file +from .posterior import read_raw_samples_from_file, write_samples_to_file +from .dynesty import CommonNestedMetadataIO -class NessaiFile(BaseNestedSamplerFile): +class NessaiFile(CommonNestedMetadataIO, BaseNestedSamplerFile): """Class to handle file IO for the ``nessai`` sampler.""" name = "nessai_file" + def read_raw_samples(self, fields, raw_samples=False, seed=0): + """Reads samples from a nessai file and constructs a posterior. + + Using rejection sampling to resample the nested samples + + Parameters + ---------- + fields : list of str + The names of the parameters to load. Names must correspond to + dataset names in the file's ``samples`` group. + raw_samples : bool, optional + Return the raw (unweighted) samples instead of the estimated + posterior samples. Default is False. + + Returns + ------- + dict : + Dictionary of parameter fields -> samples. + """ + samples = read_raw_samples_from_file(self, fields) + logwt = read_raw_samples_from_file(self, ['logwt'])['logwt'] + loglikelihood = read_raw_samples_from_file( + self, ['loglikelihood'])['loglikelihood'] + if not raw_samples: + N = len(logwt) + # Rejection sample + rng = numpy.random.default_rng(seed) + logwt -= logwt.max() + logu = numpy.log(rng.rand(N)) + keep = logwt > logu + post = {'loglikelihood': loglikelihood[keep]} + for param in fields: + post[param] = samples[param][keep] + return post + else: + return samples + def write_pickled_data_into_checkpoint_file(self, data): """Write the pickled data into a checkpoint file""" if "sampler_info/saved_state" not in self: From 25b8edb05b9d3adfac2a600d3373aae998937461 Mon Sep 17 00:00:00 2001 From: mj-will Date: Wed, 29 Nov 2023 16:48:28 +0000 Subject: [PATCH 09/24] use callback for checkpointing in nessai --- pycbc/inference/sampler/nessai.py | 61 ++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index 1fd56a57534..7443e956c6f 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -13,6 +13,7 @@ import nessai.livepoint import nessai.utils.multiprocessing import nessai.utils.settings +import numpy import numpy.lib.recfunctions as rfn from .base import BaseSampler, setup_output @@ -68,14 +69,18 @@ def model_stats(self): @property def samples(self): """The raw nested samples including the corresponding weights""" - samples = nessai.livepoint.live_points_to_dict( - self._sampler.nested_samples, - self.model.sampling_params, - ) - samples["logwt"] = self._sampler.ns.state.log_posterior_weights - samples["loglikelihood"] = self._sampler.nested_samples["logL"] - samples["logprior"] = self._sampler.nested_samples["logP"] - samples["it"] = self._sampler.nested_samples["it"] + if self._sampler.ns.nested_samples: + ns = numpy.array(self._sampler.ns.nested_samples) + samples = nessai.livepoint.live_points_to_dict( + ns, + self.model.sampling_params, + ) + samples["logwt"] = self._sampler.ns.state.log_posterior_weights + samples["loglikelihood"] = ns["logL"] + samples["logprior"] = ns["logP"] + samples["it"] = ns["it"] + else: + samples = {} return samples def run(self, **kwargs): @@ -112,6 +117,7 @@ def run(self, **kwargs): close_pool=False, signal_handling=False, resume_data=self.resume_data, + checkpoint_callback=self.checkpoint_callback, **extra_kwds, ) logging.info("Starting sampling with nessai") @@ -155,7 +161,13 @@ def from_config( ) # Keyword arguments the user cannot configure via the config - remove_kwds = ["pool", "n_pool", "close_pool", "signal_handling"] + remove_kwds = [ + "pool", + "n_pool", + "close_pool", + "signal_handling", + "checkpoint_callback", + ] for kwd in remove_kwds: default_kwds.pop(kwd, None) @@ -215,13 +227,20 @@ def set_initial_conditions( """ pass - def checkpoint(self): - """Checkpoint the sampler""" + def checkpoint_callback(self, state): + """Callback for checkpointing. + + This will be called periodically by nessai. + """ for fn in [self.checkpoint_file, self.backup_file]: with self.io(fn, "a") as fp: - fp.write_pickled_data_into_checkpoint_file(self._sampler.ns) + fp.write_pickled_data_into_checkpoint_file(state) self.write_results(fn) + def checkpoint(self): + """Checkpoint the sampler""" + self.checkpoint_callback(self._sampler.ns) + def resume_from_checkpoint(self): """Reads the resume data from the checkpoint file.""" try: @@ -235,23 +254,21 @@ def resume_from_checkpoint(self): def finalize(self): """Finalize sampling""" - logz = self._sampler.log_evidence - dlogz = self._sampler.log_evidence_error - + logz = self._sampler.ns.log_evidence + dlogz = self._sampler.ns.log_evidence_error logging.info(f"log Z, dlog Z: {logz}, {dlogz}") - self.checkpoint() - for fn in [self.checkpoint_file, self.backup_file]: - self.write_results(fn) - def write_results(self, filename): - """Write the results to a given file""" + """Write the results to a given file. + + Writes the nested samples, log-evidence and log-evidence error. + """ with self.io(filename, "a") as fp: fp.write_raw_samples(self.samples) fp.write_logevidence( - self._sampler.log_evidence, - self._sampler.log_evidence_error, + self._sampler.ns.log_evidence, + self._sampler.ns.log_evidence_error, ) From 2d757fd74a026d970e6b22d4c6c3243fb61c5eae Mon Sep 17 00:00:00 2001 From: mj-will Date: Wed, 29 Nov 2023 17:15:10 +0000 Subject: [PATCH 10/24] start addressing codeclimate issues --- pycbc/inference/io/__init__.py | 2 +- pycbc/inference/io/nessai.py | 4 ++-- pycbc/inference/sampler/nessai.py | 23 ++++++++++++++++------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pycbc/inference/io/__init__.py b/pycbc/inference/io/__init__.py index b17ddd7a701..4b3fd0ce909 100644 --- a/pycbc/inference/io/__init__.py +++ b/pycbc/inference/io/__init__.py @@ -50,7 +50,7 @@ DynestyFile.name: DynestyFile, PosteriorFile.name: PosteriorFile, UltranestFile.name: UltranestFile, - NessaiFile.name : NessaiFile, + NessaiFile.name: NessaiFile, } try: diff --git a/pycbc/inference/io/nessai.py b/pycbc/inference/io/nessai.py index b05b81c6461..d647f74256b 100644 --- a/pycbc/inference/io/nessai.py +++ b/pycbc/inference/io/nessai.py @@ -37,11 +37,11 @@ def read_raw_samples(self, fields, raw_samples=False, seed=0): loglikelihood = read_raw_samples_from_file( self, ['loglikelihood'])['loglikelihood'] if not raw_samples: - N = len(logwt) + n = len(logwt) # Rejection sample rng = numpy.random.default_rng(seed) logwt -= logwt.max() - logu = numpy.log(rng.rand(N)) + logu = numpy.log(rng.rand(n)) keep = logwt > logu post = {'loglikelihood': loglikelihood[keep]} for param in fields: diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index 7443e956c6f..2aba9dbd4f9 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -99,7 +99,7 @@ def run(self, **kwargs): run_kwds = self.run_kwds.copy() if kwargs is not None: - logging.info(f"Updating keyword arguments with {kwargs}") + logging.info("Updating keyword arguments with %s" % kwargs) extra_kwds.update( {k: v for k, v in kwargs.items() if k in default_kwds} ) @@ -125,7 +125,15 @@ def run(self, **kwargs): @staticmethod def get_default_kwds(importance_nested_sampler=False): - # Determine all possible keyword arguments that are not hardcoded + """Return lists of all allowed keyword arguments for nessai. + + Returns + ------- + default_kwds : list + List of keyword arguments that can be passed to FlowSampler + run_kwds: list + List of keyword arguments that can be passed to FlowSampler.run + """ return nessai.utils.settings.get_all_kwargs( importance_nested_sampler=importance_nested_sampler, split_kwargs=True, @@ -195,8 +203,8 @@ def from_config( raise RuntimeError( f"Config contains unknown options: {invalid_kwds}" ) - logging.info(f"nessai keyword arguments: {kwds}") - logging.info(f"nessai run keyword arguments: {run_kwds}") + logging.info("nessai keyword arguments: %s" % kwds) + logging.info("nessai run keyword arguments: %s" % run_kwds) loglikelihood_function = get_optional_arg_from_config( cp, section, "loglikelihood-function" @@ -247,16 +255,16 @@ def resume_from_checkpoint(self): with loadfile(self.checkpoint_file, "r") as fp: self.resume_data = fp.read_pickled_data_from_checkpoint_file() logging.info( - f"Found valid checkpoint file: {self.checkpoint_file}" + "Found valid checkpoint file: %s" % self.checkpoint_file ) except Exception as e: - logging.info("Failed to load checkpoint file with error: {e}") + logging.info("Failed to load checkpoint file with error: %s" % e) def finalize(self): """Finalize sampling""" logz = self._sampler.ns.log_evidence dlogz = self._sampler.ns.log_evidence_error - logging.info(f"log Z, dlog Z: {logz}, {dlogz}") + logging.info("log Z, dlog Z: %s, %s" % (logz, dlogz)) self.checkpoint() def write_results(self, filename): @@ -310,6 +318,7 @@ def __init__(self, model, loglikelihood_function=None): self.parallelise_prior = True def to_dict(self, x): + """Convert nessai a live point array to a dictionary.""" return {n: x[n].item() for n in self.names} def to_live_points(self, x): From 4cf44cdaaaac4b893805f18f32d9452eb1194ef5 Mon Sep 17 00:00:00 2001 From: mj-will Date: Wed, 29 Nov 2023 17:15:49 +0000 Subject: [PATCH 11/24] add nessai to auxiliary samplers --- companion.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/companion.txt b/companion.txt index a101c774632..0f7ac731dfc 100644 --- a/companion.txt +++ b/companion.txt @@ -14,6 +14,7 @@ cpnest pymultinest ultranest https://github.com/willvousden/ptemcee/archive/master.tar.gz +nessai # useful to look at PyCBC Live with htop setproctitle From a1fe1312b973a7c2976aaa6d1f56f86bd1cd2525 Mon Sep 17 00:00:00 2001 From: mj-will Date: Wed, 29 Nov 2023 17:23:39 +0000 Subject: [PATCH 12/24] add additional comments for nessai --- pycbc/inference/sampler/nessai.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index 2aba9dbd4f9..97bd6bf8b04 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -159,6 +159,8 @@ def from_config( ) else: importance_nested_sampler = False + + # Requires additional development work, see the model class below if importance_nested_sampler is True: raise NotImplementedError( "Importance nested sampler is not currently supported" @@ -184,6 +186,9 @@ def from_config( kwds = {} run_kwds = {} + # ast.literal_eval is used here since specifying a dictionary with all + # various types would be difficult. However, one may wish to revisit + # this in future, e.g. if evaluating code is a concern. for d_out, d_defaults in zip( [kwds, run_kwds], [default_kwds, default_run_kwds] ): @@ -219,6 +224,7 @@ def from_config( extra_kwds=kwds, ) + # Do not need to check number of samples for a nested sampler setup_output(obj, output_file, check_nsamples=False) if not obj.new_checkpoint: obj.resume_from_checkpoint() @@ -318,12 +324,12 @@ def __init__(self, model, loglikelihood_function=None): self.parallelise_prior = True def to_dict(self, x): - """Convert nessai a live point array to a dictionary.""" + """Convert a nessai live point array to a dictionary""" return {n: x[n].item() for n in self.names} def to_live_points(self, x): """Convert to the structured arrays used by nessai""" - # TODO: could this be improved? + # It is possible this could be made faster return nessai.livepoint.numpy_array_to_live_points( rfn.structured_to_unstructured(x), self.names, @@ -350,6 +356,7 @@ def log_likelihood(self, x): def from_unit_hypercube(self, x): """Map from the unit-hypercube to the prior.""" # Needs to be implemented for importance nested sampler + # This method is already available in pycbc but the inverse is not raise NotImplementedError def to_unit_hypercube(self, x): From 0735c553fb561823db8283e1be6c6826201ca27f Mon Sep 17 00:00:00 2001 From: mj-will Date: Wed, 29 Nov 2023 17:28:34 +0000 Subject: [PATCH 13/24] make simple sampler example 2d nessai does not support 1d likelihoods, so this change is neede to test nessai in the CI --- examples/inference/samplers/simp.ini | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/inference/samplers/simp.ini b/examples/inference/samplers/simp.ini index e82f9854ebe..88d883a89ea 100644 --- a/examples/inference/samplers/simp.ini +++ b/examples/inference/samplers/simp.ini @@ -3,8 +3,14 @@ name = test_normal [variable_params] x = +y = [prior-x] name = uniform min-x = -10 max-x = 10 + +[prior-y] +name = uniform +min-y = -10 +max-y = 10 From f9f204ae5263c7bcae8e2f19d9ec568230f3add7 Mon Sep 17 00:00:00 2001 From: mj-will Date: Thu, 30 Nov 2023 09:31:23 +0000 Subject: [PATCH 14/24] fix call to rng.random --- pycbc/inference/io/nessai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycbc/inference/io/nessai.py b/pycbc/inference/io/nessai.py index d647f74256b..9c9fc52afd9 100644 --- a/pycbc/inference/io/nessai.py +++ b/pycbc/inference/io/nessai.py @@ -41,7 +41,7 @@ def read_raw_samples(self, fields, raw_samples=False, seed=0): # Rejection sample rng = numpy.random.default_rng(seed) logwt -= logwt.max() - logu = numpy.log(rng.rand(n)) + logu = numpy.log(rng.random(n)) keep = logwt > logu post = {'loglikelihood': loglikelihood[keep]} for param in fields: From bc369b6ee87549218e9b65f2d2982c9145fe6579 Mon Sep 17 00:00:00 2001 From: mj-will Date: Thu, 30 Nov 2023 09:33:11 +0000 Subject: [PATCH 15/24] add nessai to samplers example and update plot --- examples/inference/samplers/nessai_stub.ini | 3 +++ examples/inference/samplers/run.sh | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 examples/inference/samplers/nessai_stub.ini diff --git a/examples/inference/samplers/nessai_stub.ini b/examples/inference/samplers/nessai_stub.ini new file mode 100644 index 00000000000..cdde947b339 --- /dev/null +++ b/examples/inference/samplers/nessai_stub.ini @@ -0,0 +1,3 @@ +[sampler] +name = nessai +nlive = 200 diff --git a/examples/inference/samplers/run.sh b/examples/inference/samplers/run.sh index 5012f0b251b..5e3d6e6fd30 100755 --- a/examples/inference/samplers/run.sh +++ b/examples/inference/samplers/run.sh @@ -1,5 +1,5 @@ #!/bin/sh -for f in cpnest_stub.ini emcee_stub.ini emcee_pt_stub.ini dynesty_stub.ini ultranest_stub.ini epsie_stub.ini; do +for f in cpnest_stub.ini emcee_stub.ini emcee_pt_stub.ini dynesty_stub.ini ultranest_stub.ini epsie_stub.ini nessai_stub.ini; do echo $f pycbc_inference \ --config-files `dirname $0`/simp.ini `dirname $0`/$f \ @@ -16,4 +16,8 @@ dynesty_stub.ini.hdf:dynesty \ ultranest_stub.ini.hdf:ultranest \ epsie_stub.ini.hdf:espie \ cpnest_stub.ini.hdf:cpnest \ ---output-file sample.png +nessai_stub.ini.hdf:nessai \ +--output-file sample.png \ +--plot-contours \ +--no-contour-labels \ +--no-marginal-titles From cae7be662b15fd60f8564d141d574f1bcbd7a76a Mon Sep 17 00:00:00 2001 From: mj-will Date: Thu, 30 Nov 2023 09:53:53 +0000 Subject: [PATCH 16/24] set minimum version for nessai --- companion.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/companion.txt b/companion.txt index 0f7ac731dfc..06b49762571 100644 --- a/companion.txt +++ b/companion.txt @@ -14,7 +14,7 @@ cpnest pymultinest ultranest https://github.com/willvousden/ptemcee/archive/master.tar.gz -nessai +nessai>=0.11.0 # useful to look at PyCBC Live with htop setproctitle From b84fda675e141b7d2e2b6743b12ee5fa0d4b9396 Mon Sep 17 00:00:00 2001 From: mj-will Date: Thu, 30 Nov 2023 14:21:46 +0000 Subject: [PATCH 17/24] force cpu-only version of torch --- companion.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/companion.txt b/companion.txt index 06b49762571..19ea6b0bb73 100644 --- a/companion.txt +++ b/companion.txt @@ -14,6 +14,9 @@ cpnest pymultinest ultranest https://github.com/willvousden/ptemcee/archive/master.tar.gz +# Force the cpu-only version of PyTorch +--extra-index-url https://download.pytorch.org/whl/cpu +torch nessai>=0.11.0 # useful to look at PyCBC Live with htop From 25c86acccf0920465e483317c496821853ed1ba8 Mon Sep 17 00:00:00 2001 From: mj-will Date: Thu, 30 Nov 2023 15:14:01 +0000 Subject: [PATCH 18/24] add missing epsie jump proposal --- examples/inference/samplers/epsie_stub.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/inference/samplers/epsie_stub.ini b/examples/inference/samplers/epsie_stub.ini index 64c210a6045..7953b4ef266 100644 --- a/examples/inference/samplers/epsie_stub.ini +++ b/examples/inference/samplers/epsie_stub.ini @@ -14,3 +14,6 @@ ntemps = 4 [jump_proposal-x] name = normal + +[jump_proposal-y] +name = normal From c871f50c3406b3cfa8ba7b4ecbc2fae88afe19bd Mon Sep 17 00:00:00 2001 From: mj-will Date: Mon, 4 Dec 2023 13:38:23 +0000 Subject: [PATCH 19/24] add plot-marginal to samplers plot --- examples/inference/samplers/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/inference/samplers/run.sh b/examples/inference/samplers/run.sh index 5e3d6e6fd30..91e41e7fbb7 100755 --- a/examples/inference/samplers/run.sh +++ b/examples/inference/samplers/run.sh @@ -19,5 +19,6 @@ cpnest_stub.ini.hdf:cpnest \ nessai_stub.ini.hdf:nessai \ --output-file sample.png \ --plot-contours \ +--plot-marginal \ --no-contour-labels \ --no-marginal-titles From 7343d12dd72c185f8ddf7b808590bfff04bc60e0 Mon Sep 17 00:00:00 2001 From: mj-will Date: Mon, 11 Dec 2023 11:51:54 +0000 Subject: [PATCH 20/24] fix whitespace --- pycbc/inference/sampler/nessai.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index 97bd6bf8b04..ed46e46de56 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -126,7 +126,7 @@ def run(self, **kwargs): @staticmethod def get_default_kwds(importance_nested_sampler=False): """Return lists of all allowed keyword arguments for nessai. - + Returns ------- default_kwds : list @@ -159,7 +159,7 @@ def from_config( ) else: importance_nested_sampler = False - + # Requires additional development work, see the model class below if importance_nested_sampler is True: raise NotImplementedError( @@ -243,7 +243,7 @@ def set_initial_conditions( def checkpoint_callback(self, state): """Callback for checkpointing. - + This will be called periodically by nessai. """ for fn in [self.checkpoint_file, self.backup_file]: @@ -275,7 +275,7 @@ def finalize(self): def write_results(self, filename): """Write the results to a given file. - + Writes the nested samples, log-evidence and log-evidence error. """ with self.io(filename, "a") as fp: From 2598c1a7c72d8c524adf74fcb4846a35a972e35e Mon Sep 17 00:00:00 2001 From: mj-will Date: Mon, 11 Dec 2023 13:47:08 +0000 Subject: [PATCH 21/24] use lazy formatting in logging functions --- pycbc/inference/sampler/nessai.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index ed46e46de56..42a11caf8c0 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -99,7 +99,7 @@ def run(self, **kwargs): run_kwds = self.run_kwds.copy() if kwargs is not None: - logging.info("Updating keyword arguments with %s" % kwargs) + logging.info("Updating keyword arguments with %s", kwargs) extra_kwds.update( {k: v for k, v in kwargs.items() if k in default_kwds} ) @@ -208,8 +208,8 @@ def from_config( raise RuntimeError( f"Config contains unknown options: {invalid_kwds}" ) - logging.info("nessai keyword arguments: %s" % kwds) - logging.info("nessai run keyword arguments: %s" % run_kwds) + logging.info("nessai keyword arguments: %s", kwds) + logging.info("nessai run keyword arguments: %s", run_kwds) loglikelihood_function = get_optional_arg_from_config( cp, section, "loglikelihood-function" @@ -261,16 +261,16 @@ def resume_from_checkpoint(self): with loadfile(self.checkpoint_file, "r") as fp: self.resume_data = fp.read_pickled_data_from_checkpoint_file() logging.info( - "Found valid checkpoint file: %s" % self.checkpoint_file + "Found valid checkpoint file: %s", self.checkpoint_file ) except Exception as e: - logging.info("Failed to load checkpoint file with error: %s" % e) + logging.info("Failed to load checkpoint file with error: %s", e) def finalize(self): """Finalize sampling""" logz = self._sampler.ns.log_evidence dlogz = self._sampler.ns.log_evidence_error - logging.info("log Z, dlog Z: %s, %s" % (logz, dlogz)) + logging.info("log Z, dlog Z: %s, %s", logz, dlogz) self.checkpoint() def write_results(self, filename): From d1b33ffbca94cc58c2c00cc7033de7344e56dbfa Mon Sep 17 00:00:00 2001 From: mj-will Date: Mon, 11 Dec 2023 14:13:48 +0000 Subject: [PATCH 22/24] move functions to common nested class --- pycbc/inference/io/dynesty.py | 76 +++++++++++++++++------------------ 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/pycbc/inference/io/dynesty.py b/pycbc/inference/io/dynesty.py index 1d77fa35608..5a79882f3d3 100644 --- a/pycbc/inference/io/dynesty.py +++ b/pycbc/inference/io/dynesty.py @@ -90,6 +90,44 @@ def extra_args_parser(parser=None, skip_args=None, **kwargs): "extracted instead.") return parser, actions + def write_pickled_data_into_checkpoint_file(self, state): + """Dump the sampler state into checkpoint file + """ + if 'sampler_info/saved_state' not in self: + self.create_group('sampler_info/saved_state') + dump_state(state, self, path='sampler_info/saved_state') + + def read_pickled_data_from_checkpoint_file(self): + """Load the sampler state (pickled) from checkpoint file + """ + return load_state(self, path='sampler_info/saved_state') + + def write_raw_samples(self, data, parameters=None): + """Write the nested samples to the file + """ + if 'samples' not in self: + self.create_group('samples') + write_samples_to_file(self, data, parameters=parameters, + group='samples') + def validate(self): + """Runs a validation test. + This checks that a samples group exist, and that pickeled data can + be loaded. + + Returns + ------- + bool : + Whether or not the file is valid as a checkpoint file. + """ + try: + if 'sampler_info/saved_state' in self: + load_state(self, path='sampler_info/saved_state') + checkpoint_valid = True + except KeyError: + checkpoint_valid = False + return checkpoint_valid + + class DynestyFile(CommonNestedMetadataIO, BaseNestedSamplerFile): """Class to handle file IO for the ``dynesty`` sampler.""" @@ -148,41 +186,3 @@ def read_raw_samples(self, fields, raw_samples=False, seed=0): return post else: return samples - - def write_pickled_data_into_checkpoint_file(self, state): - """Dump the sampler state into checkpoint file - """ - if 'sampler_info/saved_state' not in self: - self.create_group('sampler_info/saved_state') - dump_state(state, self, path='sampler_info/saved_state') - - def read_pickled_data_from_checkpoint_file(self): - """Load the sampler state (pickled) from checkpoint file - """ - return load_state(self, path='sampler_info/saved_state') - - def write_raw_samples(self, data, parameters=None): - """Write the nested samples to the file - """ - if 'samples' not in self: - self.create_group('samples') - write_samples_to_file(self, data, parameters=parameters, - group='samples') - - def validate(self): - """Runs a validation test. - This checks that a samples group exist, and that pickeled data can - be loaded. - - Returns - ------- - bool : - Whether or not the file is valid as a checkpoint file. - """ - try: - if 'sampler_info/saved_state' in self: - load_state(self, path='sampler_info/saved_state') - checkpoint_valid = True - except KeyError: - checkpoint_valid = False - return checkpoint_valid From fc7bcf04cb2bd2b888fe6fd861be420c54ecd896 Mon Sep 17 00:00:00 2001 From: mj-will Date: Mon, 11 Dec 2023 14:14:17 +0000 Subject: [PATCH 23/24] update for change common nested class --- pycbc/inference/io/nessai.py | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/pycbc/inference/io/nessai.py b/pycbc/inference/io/nessai.py index 9c9fc52afd9..2ba5a5a09c7 100644 --- a/pycbc/inference/io/nessai.py +++ b/pycbc/inference/io/nessai.py @@ -37,33 +37,14 @@ def read_raw_samples(self, fields, raw_samples=False, seed=0): loglikelihood = read_raw_samples_from_file( self, ['loglikelihood'])['loglikelihood'] if not raw_samples: - n = len(logwt) + n_samples = len(logwt) # Rejection sample rng = numpy.random.default_rng(seed) logwt -= logwt.max() - logu = numpy.log(rng.random(n)) + logu = numpy.log(rng.random(n_samples)) keep = logwt > logu post = {'loglikelihood': loglikelihood[keep]} for param in fields: post[param] = samples[param][keep] return post - else: - return samples - - def write_pickled_data_into_checkpoint_file(self, data): - """Write the pickled data into a checkpoint file""" - if "sampler_info/saved_state" not in self: - self.create_group("sampler_info/saved_state") - dump_state(data, self, path="sampler_info/saved_state") - - def read_pickled_data_from_checkpoint_file(self): - """Read the pickled data from a checkpoint file""" - return load_state(self, path="sampler_info/saved_state") - - def write_raw_samples(self, data, parameters=None): - """Write the nested samples to the file""" - if "samples" not in self: - self.create_group("samples") - write_samples_to_file( - self, data, parameters=parameters, group="samples" - ) + return samples From 894f29d33f9916ecb4996cecfbb9456036307b78 Mon Sep 17 00:00:00 2001 From: mj-will Date: Mon, 11 Dec 2023 14:26:59 +0000 Subject: [PATCH 24/24] address more code climate issues --- pycbc/inference/io/nessai.py | 3 +-- pycbc/inference/sampler/nessai.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pycbc/inference/io/nessai.py b/pycbc/inference/io/nessai.py index 2ba5a5a09c7..86c1bcfba41 100644 --- a/pycbc/inference/io/nessai.py +++ b/pycbc/inference/io/nessai.py @@ -3,8 +3,7 @@ from .base_nested_sampler import BaseNestedSamplerFile -from ...io.hdf import dump_state, load_state -from .posterior import read_raw_samples_from_file, write_samples_to_file +from .posterior import read_raw_samples_from_file from .dynesty import CommonNestedMetadataIO diff --git a/pycbc/inference/sampler/nessai.py b/pycbc/inference/sampler/nessai.py index 42a11caf8c0..9da6c7461c7 100644 --- a/pycbc/inference/sampler/nessai.py +++ b/pycbc/inference/sampler/nessai.py @@ -239,7 +239,6 @@ def set_initial_conditions( This is not used for nessai. """ - pass def checkpoint_callback(self, state): """Callback for checkpointing.