diff --git a/.azure-pipelines/azure-pipelines-external.yml b/.azure-pipelines/azure-pipelines-external.yml index 0a0fd953c1..fd142aa311 100644 --- a/.azure-pipelines/azure-pipelines-external.yml +++ b/.azure-pipelines/azure-pipelines-external.yml @@ -14,7 +14,6 @@ jobs: python.version: "3.9" pystan.version: "latest" cmdstanpy.version: "latest" - pymc3.version: "latest" emcee.version: "latest" name: "External latest" @@ -22,7 +21,6 @@ jobs: python.version: "3.9" pystan.version: 2.19.1.1 cmdstanpy.version: "github" - pymc3.version: "github" emcee.version: 2 name: "External special" @@ -74,14 +72,7 @@ jobs: python -m pip --no-cache-dir install "emcee<3" fi - if [ "$(pymc3.version)" = "github" ]; then - python -m pip --no-cache-dir --log log.txt install git+https://github.com/pymc-devs/pymc3@v3 - cat log.txt - else - python -m pip --no-cache-dir install pymc3 - fi - - grep -Ev '^pystan|^cmdstanpy|^emcee|^pymc3' requirements-external.txt | xargs python -m pip install + grep -Ev '^pystan|^cmdstanpy|^emcee' requirements-external.txt | xargs python -m pip install displayName: 'Install packages' diff --git a/arviz/data/__init__.py b/arviz/data/__init__.py index f34b013c45..d81ecf9911 100644 --- a/arviz/data/__init__.py +++ b/arviz/data/__init__.py @@ -12,7 +12,6 @@ from .io_netcdf import from_netcdf, to_netcdf from .io_numpyro import from_numpyro from .io_pyjags import from_pyjags -from .io_pymc3 import from_pymc3, from_pymc3_predictions from .io_pyro import from_pyro from .io_pystan import from_pystan from .utils import extract, extract_dataset @@ -31,8 +30,6 @@ "convert_to_inference_data", "from_beanmachine", "from_pyjags", - "from_pymc3", - "from_pymc3_predictions", "from_pystan", "from_emcee", "from_cmdstan", diff --git a/arviz/data/converters.py b/arviz/data/converters.py index 01cb4ec010..2961f0aaf1 100644 --- a/arviz/data/converters.py +++ b/arviz/data/converters.py @@ -9,7 +9,6 @@ from .io_cmdstanpy import from_cmdstanpy from .io_emcee import from_emcee from .io_numpyro import from_numpyro -from .io_pymc3 import from_pymc3 from .io_pyro import from_pyro from .io_pystan import from_pystan @@ -23,14 +22,13 @@ def convert_to_inference_data(obj, *, group="posterior", coords=None, dims=None, Parameters ---------- - obj : dict, str, np.ndarray, xr.Dataset, pystan fit, pymc3 trace + obj : dict, str, np.ndarray, xr.Dataset, pystan fit A supported object to convert to InferenceData: | InferenceData: returns unchanged | str: Attempts to load the cmdstan csv or netcdf dataset from disk | pystan fit: Automatically extracts data | cmdstanpy fit: Automatically extracts data | cmdstan csv-list: Automatically extracts data - | pymc3 trace: Automatically extracts data | emcee sampler: Automatically extracts data | pyro MCMC: Automatically extracts data | beanmachine MonteCarloSamples: Automatically extracts data @@ -89,8 +87,6 @@ def convert_to_inference_data(obj, *, group="posterior", coords=None, dims=None, return from_cmdstanpy(**kwargs) else: # pystan or pystan3 return from_pystan(**kwargs) - elif obj.__class__.__name__ == "MultiTrace": # ugly, but doesn't make PyMC3 a requirement - return from_pymc3(trace=kwargs.pop(group), **kwargs) elif obj.__class__.__name__ == "EnsembleSampler": # ugly, but doesn't make emcee a requirement return from_emcee(sampler=kwargs.pop(group), **kwargs) elif obj.__class__.__name__ == "MonteCarloSamples": @@ -125,7 +121,6 @@ def convert_to_inference_data(obj, *, group="posterior", coords=None, dims=None, "netcdf filename", "numpy array", "pystan fit", - "pymc3 trace", "emcee fit", "pyro mcmc fit", "numpyro mcmc fit", @@ -152,13 +147,12 @@ def convert_to_dataset(obj, *, group="posterior", coords=None, dims=None): Parameters ---------- - obj : dict, str, np.ndarray, xr.Dataset, pystan fit, pymc3 trace + obj : dict, str, np.ndarray, xr.Dataset, pystan fit A supported object to convert to InferenceData: - InferenceData: returns unchanged - str: Attempts to load the netcdf dataset from disk - pystan fit: Automatically extracts data - - pymc3 trace: Automatically extracts data - xarray.Dataset: adds to InferenceData as only group - xarray.DataArray: creates an xarray dataset as the only group, gives the array an arbitrary name, if name not set diff --git a/arviz/data/io_pymc3.py b/arviz/data/io_pymc3.py deleted file mode 100644 index 765062f3da..0000000000 --- a/arviz/data/io_pymc3.py +++ /dev/null @@ -1,55 +0,0 @@ -# pylint: disable=unused-import -"""PyMC3-specific conversion code.""" -import pkg_resources -import packaging - -__all__ = ["from_pymc3", "from_pymc3_predictions"] - -try: - pymc3_version = pkg_resources.get_distribution("pymc3").version - PYMC3_V4 = packaging.version.parse(pymc3_version) >= packaging.version.parse("4.0") -except pkg_resources.DistributionNotFound: - PYMC3_V4 = False - - -if not PYMC3_V4: - from .io_pymc3_3x import from_pymc3, from_pymc3_predictions -else: - - def from_pymc3( - trace=None, - *, - prior=None, - posterior_predictive=None, - log_likelihood=None, - coords=None, - dims=None, - model=None, - save_warmup=None, - density_dist_obs=True, - ): - """Convert pymc3 data into an InferenceData object. - - Placeholder for function moved to PyMC3. - """ - raise NotImplementedError( - "The converter has been moved to PyMC3 codebase, use pymc3.to_inference_data" - ) - - def from_pymc3_predictions( - predictions, - posterior_trace=None, - model=None, - coords=None, - dims=None, - idata_orig=None, - inplace=False, - ): - """Translate out-of-sample predictions into ``InferenceData``. - - Placeholder for function moved to PyMC3. - """ - raise NotImplementedError( - "The converter has been moved to PyMC3 codebase, " - "use pymc3.to_inference_data_predictions" - ) diff --git a/arviz/data/io_pymc3_3x.py b/arviz/data/io_pymc3_3x.py deleted file mode 100644 index 117946135f..0000000000 --- a/arviz/data/io_pymc3_3x.py +++ /dev/null @@ -1,654 +0,0 @@ -# pylint: disable=unused-import -"""PyMC3-specific conversion code (PyMC3<4.0).""" -import logging -import warnings -from types import ModuleType -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union - -import numpy as np -import xarray as xr - -from .. import utils -from ..rcparams import rcParams -from .base import CoordSpec, DimSpec, dict_to_dataset, generate_dims_coords, make_attrs, requires -from .inference_data import InferenceData, concat - -if TYPE_CHECKING: - from typing import Set # pylint: disable=ungrouped-imports - - import pymc3 as pm - - try: - import aesara # pylint: disable=unused-import - except ImportError: - import theano as aesara # pylint: disable=unused-import - from pymc3 import Model, MultiTrace # pylint: disable=invalid-name -else: - MultiTrace = Any # pylint: disable=invalid-name - Model = Any # pylint: disable=invalid-name - -___all__ = [""] - -_log = logging.getLogger(__name__) - -Coords = Dict[str, List[Any]] -Dims = Dict[str, List[str]] -# random variable object ... -Var = Any # pylint: disable=invalid-name - - -def _monkey_patch_pymc3(pm: ModuleType) -> None: # pylint: disable=invalid-name - assert pm.__name__ == "pymc3" - - def fixed_eq(self, other): - """Use object identity for MultiObservedRV equality.""" - return self is other - - if tuple((int(x) for x in pm.__version__.split("."))) < (3, 9): # type: ignore - pm.model.MultiObservedRV.__eq__ = fixed_eq # type: ignore - - -class PyMC3Converter: # pylint: disable=too-many-instance-attributes - """Encapsulate PyMC3 specific logic.""" - - model = None # type: Optional[pm.Model] - nchains = None # type: int - ndraws = None # type: int - posterior_predictive = None # Type: Optional[Dict[str, np.ndarray]] - predictions = None # Type: Optional[Dict[str, np.ndarray]] - prior = None # Type: Optional[Dict[str, np.ndarray]] - - def __init__( - self, - *, - trace=None, - prior=None, - posterior_predictive=None, - log_likelihood=None, - predictions=None, - coords: Optional[Coords] = None, - dims: Optional[Dims] = None, - model=None, - save_warmup: Optional[bool] = None, - density_dist_obs: bool = True, - ): - import pymc3 - - try: - import aesara # pylint: disable=redefined-outer-name - except ImportError: - import theano as aesara - - _monkey_patch_pymc3(pymc3) - - self.pymc3 = pymc3 - self.aesara = aesara - - self.save_warmup = rcParams["data.save_warmup"] if save_warmup is None else save_warmup - self.trace = trace - - # this permits us to get the model from command-line argument or from with model: - try: - self.model = self.pymc3.modelcontext(model or self.model) - except TypeError as e: - _log.error("Got error %s trying to find log_likelihood in translation.", e) - self.model = None - - if self.model is None: - warnings.warn( - "Using `from_pymc3` without the model will be deprecated in a future release. " - "Not using the model will return less accurate and less useful results. " - "Make sure you use the model argument or call from_pymc3 within a model context.", - FutureWarning, - ) - - # This next line is brittle and may not work forever, but is a secret - # way to access the model from the trace. - self.attrs = None - if trace is not None: - if isinstance(self.trace, InferenceData): - raise ValueError( - "Using the `InferenceData` as a `trace` argument won't work. " - "Please use the `arviz.InferenceData.extend` method to extend the " - "`InferenceData` with groups from another `InferenceData`." - ) - if self.model is None: - self.model = list(self.trace._straces.values())[ # pylint: disable=protected-access - 0 - ].model - self.nchains = trace.nchains if hasattr(trace, "nchains") else 1 - if hasattr(trace.report, "n_draws") and trace.report.n_draws is not None: - self.ndraws = trace.report.n_draws - self.attrs = { - "sampling_time": trace.report.t_sampling, - "tuning_steps": trace.report.n_tune, - } - else: - self.ndraws = len(trace) - if self.save_warmup: - warnings.warn( - "Warmup samples will be stored in posterior group and will not be" - " excluded from stats and diagnostics." - " Please consider using PyMC3>=3.9 and do not slice the trace manually.", - UserWarning, - ) - self.ntune = len(self.trace) - self.ndraws - self.posterior_trace, self.warmup_trace = self.split_trace() - else: - self.nchains = self.ndraws = 0 - - self.prior = prior - self.posterior_predictive = posterior_predictive - self.log_likelihood = ( - rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood - ) - self.predictions = predictions - - def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray: - return next(iter(dct.values())) - - if trace is None: - # if you have a posterior_predictive built with keep_dims, - # you'll lose here, but there's nothing I can do about that. - self.nchains = 1 - get_from = None - if predictions is not None: - get_from = predictions - elif posterior_predictive is not None: - get_from = posterior_predictive - elif prior is not None: - get_from = prior - if get_from is None: - # pylint: disable=line-too-long - raise ValueError( - "When constructing InferenceData must have at least" - " one of trace, prior, posterior_predictive or predictions." - ) - - aelem = arbitrary_element(get_from) - self.ndraws = aelem.shape[0] - - self.coords = {} if coords is None else coords - if hasattr(self.model, "coords"): - self.coords = {**self.model.coords, **self.coords} - - self.dims = {} if dims is None else dims - if hasattr(self.model, "RV_dims"): - model_dims = {k: list(v) for k, v in self.model.RV_dims.items()} - self.dims = {**model_dims, **self.dims} - - self.density_dist_obs = density_dist_obs - self.observations, self.multi_observations = self.find_observations() - - def find_observations(self) -> Tuple[Optional[Dict[str, Var]], Optional[Dict[str, Var]]]: - """If there are observations available, return them as a dictionary.""" - if self.model is None: - return (None, None) - observations = {} - multi_observations = {} - for obs in self.model.observed_RVs: - if hasattr(obs, "observations"): - observations[obs.name] = obs.observations - elif hasattr(obs, "data") and self.density_dist_obs: - for key, val in obs.data.items(): - multi_observations[key] = val.eval() if hasattr(val, "eval") else val - return observations, multi_observations - - def split_trace(self) -> Tuple[Union[None, MultiTrace], Union[None, MultiTrace]]: - """Split MultiTrace object into posterior and warmup. - - Returns - ------- - trace_posterior: pymc3.MultiTrace or None - The slice of the trace corresponding to the posterior. If the posterior - trace is empty, None is returned - trace_warmup: pymc3.MultiTrace or None - The slice of the trace corresponding to the warmup. If the warmup trace is - empty or ``save_warmup=False``, None is returned - """ - trace_posterior = None - trace_warmup = None - if self.save_warmup and self.ntune > 0: - trace_warmup = self.trace[: self.ntune] - if self.ndraws > 0: - trace_posterior = self.trace[self.ntune :] - return trace_posterior, trace_warmup - - def log_likelihood_vals_point(self, point, var, log_like_fun): - """Compute log likelihood for each observed point.""" - log_like_val = utils.one_de(log_like_fun(point)) - if var.missing_values: - mask = var.observations.mask - if np.ndim(mask) > np.ndim(log_like_val): - mask = np.any(mask, axis=-1) - log_like_val = np.where(mask, np.nan, log_like_val) - return log_like_val - - def _extract_log_likelihood(self, trace): - """Compute log likelihood of each observation.""" - if self.trace is None: - return None - if self.model is None: - return None - - # If we have predictions, then we have a thinned trace which does not - # support extracting a log likelihood. - if self.log_likelihood is True: - cached = [(var, var.logp_elemwise) for var in self.model.observed_RVs] - else: - cached = [ - (var, var.logp_elemwise) - for var in self.model.observed_RVs - if var.name in self.log_likelihood - ] - try: - log_likelihood_dict = ( - self.pymc3.sampling._DefaultTrace( # pylint: disable=protected-access - len(trace.chains) - ) - ) - except AttributeError as err: - raise AttributeError( - "Installed version of ArviZ requires PyMC3>=3.8. Please upgrade with " - "`pip install pymc3>=3.8` or `conda install -c conda-forge pymc3>=3.8`." - ) from err - for var, log_like_fun in cached: - try: - for k, chain in enumerate(trace.chains): - log_like_chain = [ - self.log_likelihood_vals_point(point, var, log_like_fun) - for point in trace.points([chain]) - ] - log_likelihood_dict.insert(var.name, np.stack(log_like_chain), k) - except TypeError as e: - raise TypeError( - *tuple(["While computing log-likelihood for {var}: "] + list(e.args)) - ) from e - return log_likelihood_dict.trace_dict - - @requires("trace") - def posterior_to_xarray(self): - """Convert the posterior to an xarray dataset.""" - var_names = self.pymc3.util.get_default_varnames( - self.trace.varnames, include_transformed=False - ) - data = {} - data_warmup = {} - for var_name in var_names: - if self.warmup_trace: - data_warmup[var_name] = np.array( - self.warmup_trace.get_values(var_name, combine=False, squeeze=False) - ) - if self.posterior_trace: - data[var_name] = np.array( - self.posterior_trace.get_values(var_name, combine=False, squeeze=False) - ) - return ( - dict_to_dataset( - data, library=self.pymc3, coords=self.coords, dims=self.dims, attrs=self.attrs - ), - dict_to_dataset( - data_warmup, - library=self.pymc3, - coords=self.coords, - dims=self.dims, - attrs=self.attrs, - ), - ) - - @requires("trace") - def sample_stats_to_xarray(self): - """Extract sample_stats from PyMC3 trace.""" - data = {} - rename_key = { - "model_logp": "lp", - "mean_tree_accept": "acceptance_rate", - "depth": "tree_depth", - "tree_size": "n_steps", - } - data = {} - data_warmup = {} - for stat in self.trace.stat_names: - name = rename_key.get(stat, stat) - if name == "tune": - continue - if self.warmup_trace: - data_warmup[name] = np.array( - self.warmup_trace.get_sampler_stats(stat, combine=False) - ) - if self.posterior_trace: - data[name] = np.array(self.posterior_trace.get_sampler_stats(stat, combine=False)) - - return ( - dict_to_dataset( - data, library=self.pymc3, dims=None, coords=self.coords, attrs=self.attrs - ), - dict_to_dataset( - data_warmup, library=self.pymc3, dims=None, coords=self.coords, attrs=self.attrs - ), - ) - - @requires("trace") - @requires("model") - def log_likelihood_to_xarray(self): - """Extract log likelihood and log_p data from PyMC3 trace.""" - if self.predictions or not self.log_likelihood: - return None - data_warmup = {} - data = {} - warn_msg = ( - "Could not compute log_likelihood, it will be omitted. " - "Check your model object or set log_likelihood=False" - ) - if self.posterior_trace: - try: - data = self._extract_log_likelihood(self.posterior_trace) - except TypeError: - warnings.warn(warn_msg) - if self.warmup_trace: - try: - data_warmup = self._extract_log_likelihood(self.warmup_trace) - except TypeError: - warnings.warn(warn_msg) - return ( - dict_to_dataset( - data, library=self.pymc3, dims=self.dims, coords=self.coords, skip_event_dims=True - ), - dict_to_dataset( - data_warmup, - library=self.pymc3, - dims=self.dims, - coords=self.coords, - skip_event_dims=True, - ), - ) - - def translate_posterior_predictive_dict_to_xarray(self, dct) -> xr.Dataset: - """Take Dict of variables to numpy ndarrays (samples) and translate into dataset.""" - data = {} - for k, ary in dct.items(): - shape = ary.shape - if shape[0] == self.nchains and shape[1] == self.ndraws: - data[k] = ary - elif shape[0] == self.nchains * self.ndraws: - data[k] = ary.reshape((self.nchains, self.ndraws, *shape[1:])) - else: - data[k] = utils.expand_dims(ary) - # pylint: disable=line-too-long - _log.warning( - "posterior predictive variable %s's shape not compatible with number of chains and draws. " - "This can mean that some draws or even whole chains are not represented.", - k, - ) - return dict_to_dataset(data, library=self.pymc3, coords=self.coords, dims=self.dims) - - @requires(["posterior_predictive"]) - def posterior_predictive_to_xarray(self): - """Convert posterior_predictive samples to xarray.""" - return self.translate_posterior_predictive_dict_to_xarray(self.posterior_predictive) - - @requires(["predictions"]) - def predictions_to_xarray(self): - """Convert predictions (out of sample predictions) to xarray.""" - return self.translate_posterior_predictive_dict_to_xarray(self.predictions) - - def priors_to_xarray(self): - """Convert prior samples (and if possible prior predictive too) to xarray.""" - if self.prior is None: - return {"prior": None, "prior_predictive": None} - if self.observations is not None: - prior_predictive_vars = list(self.observations.keys()) - prior_vars = [key for key in self.prior.keys() if key not in prior_predictive_vars] - else: - prior_vars = list(self.prior.keys()) - prior_predictive_vars = None - - priors_dict = { - group: ( - None - if var_names is None - else dict_to_dataset( - {k: utils.expand_dims(self.prior[k]) for k in var_names}, - library=self.pymc3, - coords=self.coords, - dims=self.dims, - ) - ) - for group, var_names in zip( - ("prior", "prior_predictive"), (prior_vars, prior_predictive_vars) - ) - } - return priors_dict - - @requires(["observations", "multi_observations"]) - @requires("model") - def observed_data_to_xarray(self): - """Convert observed data to xarray.""" - if self.predictions: - return None - dims = {} if self.dims is None else self.dims - observed_data = {} - for name, vals in {**self.observations, **self.multi_observations}.items(): - if hasattr(vals, "get_value"): - vals = vals.get_value() - vals = utils.one_de(vals) - val_dims = dims.get(name) - val_dims, coords = generate_dims_coords( - vals.shape, name, dims=val_dims, coords=self.coords - ) - # filter coords based on the dims - coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims} - observed_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords) - return xr.Dataset(data_vars=observed_data, attrs=make_attrs(library=self.pymc3)) - - @requires(["trace", "predictions"]) - @requires("model") - def constant_data_to_xarray(self): - """Convert constant data to xarray.""" - # For constant data, we are concerned only with deterministics and data. - # The constant data vars must be either pm.Data (TensorSharedVariable) or pm.Deterministic - constant_data_vars = {} # type: Dict[str, Var] - for var in self.model.deterministics: - if hasattr(self.aesara, "gof"): - ancestors_func = self.aesara.gof.graph.ancestors # pylint: disable=no-member - else: - ancestors_func = self.aesara.graph.basic.ancestors # pylint: disable=no-member - ancestors = ancestors_func(var.owner.inputs) - # no dependency on a random variable - if not any((isinstance(a, self.pymc3.model.PyMC3Variable) for a in ancestors)): - constant_data_vars[var.name] = var - - def is_data(name, var) -> bool: - assert self.model is not None - return ( - var not in self.model.deterministics - and var not in self.model.observed_RVs - and var not in self.model.free_RVs - and var not in self.model.potentials - and (self.observations is None or name not in self.observations) - ) - - # I don't know how to find pm.Data, except that they are named variables that aren't - # observed or free RVs, nor are they deterministics, and then we eliminate observations. - for name, var in self.model.named_vars.items(): - if is_data(name, var): - constant_data_vars[name] = var - - if not constant_data_vars: - return None - if self.dims is None: - dims = {} - else: - dims = self.dims - constant_data = {} - for name, vals in constant_data_vars.items(): - if hasattr(vals, "get_value"): - vals = vals.get_value() - # this might be a Deterministic, and must be evaluated - elif hasattr(self.model[name], "eval"): - vals = self.model[name].eval() - vals = np.atleast_1d(vals) - val_dims = dims.get(name) - val_dims, coords = generate_dims_coords( - vals.shape, name, dims=val_dims, coords=self.coords - ) - # filter coords based on the dims - coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims} - try: - constant_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords) - except ValueError as err: - raise ValueError(f"Error translating constant_data variable {name}: {err}") from err - return xr.Dataset(data_vars=constant_data, attrs=make_attrs(library=self.pymc3)) - - def to_inference_data(self): - """Convert all available data to an InferenceData object. - - Note that if groups can not be created (e.g., there is no `trace`, so - the `posterior` and `sample_stats` can not be extracted), then the InferenceData - will not have those groups. - """ - id_dict = { - "posterior": self.posterior_to_xarray(), - "sample_stats": self.sample_stats_to_xarray(), - "log_likelihood": self.log_likelihood_to_xarray(), - "posterior_predictive": self.posterior_predictive_to_xarray(), - "predictions": self.predictions_to_xarray(), - **self.priors_to_xarray(), - "observed_data": self.observed_data_to_xarray(), - } - if self.predictions: - id_dict["predictions_constant_data"] = self.constant_data_to_xarray() - else: - id_dict["constant_data"] = self.constant_data_to_xarray() - return InferenceData(save_warmup=self.save_warmup, **id_dict) - - -def from_pymc3( - trace=None, - *, - prior=None, - posterior_predictive=None, - log_likelihood=None, - coords=None, - dims=None, - model=None, - save_warmup=None, - density_dist_obs=True, -): - """Convert pymc3 data into an InferenceData object. - - All three of them are optional arguments, but at least one of ``trace``, - ``prior`` and ``posterior_predictive`` must be present. - For a usage example read the - :ref:`Creating InferenceData section on from_pymc3 ` - - Parameters - ---------- - trace : pymc3.MultiTrace, optional - Trace generated from MCMC sampling. Output of - :py:func:`pymc3:pymc3.sampling.sample`. - prior : dict, optional - Dictionary with the variable names as keys, and values numpy arrays - containing prior and prior predictive samples. - posterior_predictive : dict, optional - Dictionary with the variable names as keys, and values numpy arrays - containing posterior predictive samples. - log_likelihood : bool or array_like of str, optional - List of variables to calculate `log_likelihood`. Defaults to True which calculates - `log_likelihood` for all observed variables. If set to False, log_likelihood is skipped. - Defaults to the value of rcParam ``data.log_likelihood``. - coords : dict of {str: array-like}, optional - Map of coordinate names to coordinate values - dims : dict of {str: list of str}, optional - Map of variable names to the coordinate names to use to index its dimensions. - model : pymc3.Model, optional - Model used to generate ``trace``. It is not necessary to pass ``model`` if in - ``with`` context. - save_warmup : bool, optional - Save warmup iterations InferenceData object. If not defined, use default - defined by the rcParams. - density_dist_obs : bool, default True - Store variables passed with ``observed`` arg to - :class:`pymc3:pymc.distributions.DensityDist` in the generated InferenceData. - - Returns - ------- - InferenceData - """ - return PyMC3Converter( - trace=trace, - prior=prior, - posterior_predictive=posterior_predictive, - log_likelihood=log_likelihood, - coords=coords, - dims=dims, - model=model, - save_warmup=save_warmup, - density_dist_obs=density_dist_obs, - ).to_inference_data() - - -### Later I could have this return ``None`` if the ``idata_orig`` argument is supplied. But -### perhaps we should have an inplace argument? -def from_pymc3_predictions( - predictions, - posterior_trace=None, - model=None, - coords=None, - dims=None, - idata_orig=None, - inplace=False, -): - """Translate out-of-sample predictions into ``InferenceData``. - - Parameters - ---------- - predictions: Dict[str, np.ndarray] - The predictions are the return value of ``pymc3.sample_posterior_predictive``, - a dictionary of strings (variable names) to numpy ndarrays (draws). - posterior_trace: pm.MultiTrace - This should be a trace that has been thinned appropriately for - ``pymc3.sample_posterior_predictive``. Specifically, any variable whose shape is - a deterministic function of the shape of any predictor (explanatory, independent, etc.) - variables must be *removed* from this trace. - model: pymc3.Model - This argument is *not* optional, unlike in conventional uses of ``from_pymc3``. - The reason is that the posterior_trace argument is likely to supply an incorrect - value of model. - coords: Dict[str, array-like[Any]] - Coordinates for the variables. Map from coordinate names to coordinate values. - dims: Dict[str, array-like[str]] - Map from variable name to ordered set of coordinate names. - idata_orig: InferenceData, optional - If supplied, then modify this inference data in place, adding ``predictions`` and - (if available) ``predictions_constant_data`` groups. If this is not supplied, make a - fresh InferenceData - inplace: boolean, optional - If idata_orig is supplied and inplace is True, merge the predictions into idata_orig, - rather than returning a fresh InferenceData object. - - Returns - ------- - InferenceData: - May be modified ``idata_orig``. - """ - if inplace and not idata_orig: - raise ValueError( - ( - "Do not pass True for inplace unless passing" - "an existing InferenceData as idata_orig" - ) - ) - new_idata = PyMC3Converter( - trace=posterior_trace, predictions=predictions, model=model, coords=coords, dims=dims - ).to_inference_data() - if idata_orig is None: - return new_idata - elif inplace: - concat([idata_orig, new_idata], dim=None, inplace=True) - return idata_orig - else: - # if we are not returning in place, then merge the old groups into the new inference - # data and return that. - concat([new_idata, idata_orig], dim=None, copy=True, inplace=True) - return new_idata diff --git a/arviz/tests/external_tests/test_data_pymc.py b/arviz/tests/external_tests/test_data_pymc.py deleted file mode 100644 index 2111fdb445..0000000000 --- a/arviz/tests/external_tests/test_data_pymc.py +++ /dev/null @@ -1,701 +0,0 @@ -# pylint: disable=no-member, invalid-name, redefined-outer-name, protected-access, too-many-public-methods -from sys import version_info -from typing import Dict, Tuple - -import numpy as np -import pkg_resources -import packaging -import pandas as pd -import pytest -from numpy import ma - -from ... import ( # pylint: disable=wrong-import-position - InferenceData, - from_dict, - from_pymc3, - from_pymc3_predictions, -) - -from ..helpers import ( # pylint: disable=unused-import, wrong-import-position - chains, - check_multiple_attrs, - draws, - eight_schools_params, - importorskip, - load_cached_models, -) - -# Skip all tests unless running on pymc3 v3 -try: - pymc3_version = pkg_resources.get_distribution("pymc3").version - PYMC3_V4 = packaging.version.parse(pymc3_version) >= packaging.version.parse("4.0") - PYMC3_installed = True - import pymc3 as pm -except pkg_resources.DistributionNotFound: - PYMC3_V4 = False - PYMC3_installed = False - -pytestmark = pytest.mark.skipif( - not PYMC3_installed or PYMC3_V4, - reason="Run tests only if pymc3 installed and its version is <4.0", -) - - -class TestDataPyMC3: - @pytest.fixture(scope="class") - def data(self, eight_schools_params, draws, chains): - class Data: - model, obj = load_cached_models(eight_schools_params, draws, chains, "pymc3")["pymc3"] - - return Data - - def get_inference_data(self, data, eight_schools_params): - with data.model: - prior = pm.sample_prior_predictive() - posterior_predictive = pm.sample_posterior_predictive(data.obj) - - return ( - from_pymc3( - trace=data.obj, - prior=prior, - posterior_predictive=posterior_predictive, - coords={"school": np.arange(eight_schools_params["J"])}, - dims={"theta": ["school"], "eta": ["school"]}, - ), - posterior_predictive, - ) - - def get_predictions_inference_data( - self, data, eight_schools_params, inplace - ) -> Tuple[InferenceData, Dict[str, np.ndarray]]: - with data.model: - prior = pm.sample_prior_predictive() - posterior_predictive = pm.sample_posterior_predictive(data.obj) - - idata = from_pymc3( - trace=data.obj, - prior=prior, - coords={"school": np.arange(eight_schools_params["J"])}, - dims={"theta": ["school"], "eta": ["school"]}, - ) - assert isinstance(idata, InferenceData) - extended = from_pymc3_predictions( - posterior_predictive, idata_orig=idata, inplace=inplace - ) - assert isinstance(extended, InferenceData) - assert (id(idata) == id(extended)) == inplace - return (extended, posterior_predictive) - - def make_predictions_inference_data( - self, data, eight_schools_params - ) -> Tuple[InferenceData, Dict[str, np.ndarray]]: - with data.model: - posterior_predictive = pm.sample_posterior_predictive(data.obj) - idata = from_pymc3_predictions( - posterior_predictive, - posterior_trace=data.obj, - coords={"school": np.arange(eight_schools_params["J"])}, - dims={"theta": ["school"], "eta": ["school"]}, - ) - assert isinstance(idata, InferenceData) - return idata, posterior_predictive - - def test_from_pymc(self, data, eight_schools_params, chains, draws): - inference_data, posterior_predictive = self.get_inference_data(data, eight_schools_params) - test_dict = { - "posterior": ["mu", "tau", "eta", "theta"], - "sample_stats": ["diverging", "lp", "~log_likelihood"], - "log_likelihood": ["obs"], - "posterior_predictive": ["obs"], - "prior": ["mu", "tau", "eta", "theta"], - "prior_predictive": ["obs"], - "observed_data": ["obs"], - } - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - for key, values in posterior_predictive.items(): - ivalues = inference_data.posterior_predictive[key] - for chain in range(chains): - assert np.all( - np.isclose(ivalues[chain], values[chain * draws : (chain + 1) * draws]) - ) - - def test_from_pymc_predictions(self, data, eight_schools_params): - """Test that we can add predictions to a previously-existing InferenceData.""" - test_dict = { - "posterior": ["mu", "tau", "eta", "theta"], - "sample_stats": ["diverging", "lp"], - "log_likelihood": ["obs"], - "predictions": ["obs"], - "prior": ["mu", "tau", "eta", "theta"], - "observed_data": ["obs"], - } - - # check adding non-destructively - inference_data, posterior_predictive = self.get_predictions_inference_data( - data, eight_schools_params, False - ) - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - for key, values in posterior_predictive.items(): - ivalues = inference_data.predictions[key] - assert ivalues.shape[0] == 1 # one chain in predictions - assert np.all(np.isclose(ivalues[0], values)) - - # check adding in place - inference_data, posterior_predictive = self.get_predictions_inference_data( - data, eight_schools_params, True - ) - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - for key, values in posterior_predictive.items(): - ivalues = inference_data.predictions[key] - assert ivalues.shape[0] == 1 # one chain in predictions - assert np.all(np.isclose(ivalues[0], values)) - - def test_from_pymc_trace_inference_data(self): - """Check if the error is raised successfully after passing InferenceData as trace""" - idata = from_dict( - posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)} - ) - assert isinstance(idata, InferenceData) - with pytest.raises(ValueError): - from_pymc3(trace=idata, model=pm.Model()) - - def test_from_pymc_predictions_new(self, data, eight_schools_params): - # check creating new - inference_data, posterior_predictive = self.make_predictions_inference_data( - data, eight_schools_params - ) - test_dict = { - "posterior": ["mu", "tau", "eta", "theta"], - "predictions": ["obs"], - "~observed_data": [""], - } - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - for key, values in posterior_predictive.items(): - ivalues = inference_data.predictions[key] - # could the following better be done by simply flattening both the ivalues - # and the values? - if len(ivalues.shape) == 3: - ivalues_arr = np.reshape( - ivalues.values, (ivalues.shape[0] * ivalues.shape[1], ivalues.shape[2]) - ) - elif len(ivalues.shape) == 2: - ivalues_arr = np.reshape(ivalues.values, (ivalues.shape[0] * ivalues.shape[1])) - else: - raise ValueError(f"Unexpected values shape for variable {key}") - assert (ivalues.shape[0] == 2) and (ivalues.shape[1] == 500) - assert values.shape[0] == 1000 - assert np.all(np.isclose(ivalues_arr, values)) - - def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools_params): - with data.model: - posterior_predictive = pm.sample_posterior_predictive(data.obj, keep_size=True) - inference_data = from_pymc3( - trace=data.obj, - posterior_predictive=posterior_predictive, - coords={"school": np.arange(eight_schools_params["J"])}, - dims={"theta": ["school"], "eta": ["school"]}, - ) - - shape = inference_data.posterior_predictive.obs.shape - assert np.all( - [obs_s == s for obs_s, s in zip(shape, (chains, draws, eight_schools_params["J"]))] - ) - - def test_posterior_predictive_warning(self, data, eight_schools_params, caplog): - with data.model: - posterior_predictive = pm.sample_posterior_predictive(data.obj, 370) - inference_data = from_pymc3( - trace=data.obj, - posterior_predictive=posterior_predictive, - coords={"school": np.arange(eight_schools_params["J"])}, - dims={"theta": ["school"], "eta": ["school"]}, - ) - - records = caplog.records - shape = inference_data.posterior_predictive.obs.shape - assert np.all([obs_s == s for obs_s, s in zip(shape, (1, 370, eight_schools_params["J"]))]) - assert len(records) == 1 - assert records[0].levelname == "WARNING" - - @pytest.mark.skipif( - packaging.version.Version(pm.__version__) < packaging.version.Version("3.9.0"), - reason="Requires PyMC3 >= 3.9.0", - ) - @pytest.mark.parametrize("use_context", [True, False]) - def test_autodetect_coords_from_model(self, use_context): - df_data = pd.DataFrame(columns=["date"]).set_index("date") - dates = pd.date_range(start="2020-05-01", end="2020-05-20") - for city, mu in {"Berlin": 15, "San Marino": 18, "Paris": 16}.items(): - df_data[city] = np.random.normal( # pylint: disable=unsupported-assignment-operation - loc=mu, size=len(dates) - ) - df_data.index = dates - df_data.index.name = "date" - - coords = {"date": df_data.index, "city": df_data.columns} - with pm.Model(coords=coords) as model: - europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sd=3.0) - city_offset = pm.Normal("city_offset", mu=0.0, sd=3.0, dims="city") - city_temperature = pm.Deterministic( - "city_temperature", europe_mean + city_offset, dims="city" - ) - - data_dims = ("date", "city") - data = pm.Data("data", df_data, dims=data_dims) - _ = pm.Normal("likelihood", mu=city_temperature, sd=0.5, observed=data, dims=data_dims) - - trace = pm.sample( - return_inferencedata=False, - compute_convergence_checks=False, - cores=1, - chains=1, - tune=20, - draws=30, - step=pm.Metropolis(), - ) - if use_context: - idata = from_pymc3(trace=trace) - if not use_context: - idata = from_pymc3(trace=trace, model=model) - - assert "city" in list(idata.posterior.dims) - assert "city" in list(idata.observed_data.dims) - assert "date" in list(idata.observed_data.dims) - np.testing.assert_array_equal(idata.posterior.coords["city"], coords["city"]) - np.testing.assert_array_equal(idata.observed_data.coords["date"], coords["date"]) - np.testing.assert_array_equal(idata.observed_data.coords["city"], coords["city"]) - - def test_ovewrite_model_coords_dims(self): - """Check coords and dims from model object can be partially overwrited.""" - dim1 = ["a", "b"] - new_dim1 = ["c", "d"] - coords = {"dim1": dim1, "dim2": ["c1", "c2"]} - x_data = np.arange(4).reshape((2, 2)) - y = x_data + np.random.normal(size=(2, 2)) - with pm.Model(coords=coords): - x = pm.Data("x", x_data, dims=("dim1", "dim2")) - beta = pm.Normal("beta", 0, 1, dims="dim1") - _ = pm.Normal("obs", x * beta, 1, observed=y, dims=("dim1", "dim2")) - trace = pm.sample(100, tune=100) - idata1 = from_pymc3(trace) - idata2 = from_pymc3(trace, coords={"dim1": new_dim1}, dims={"beta": ["dim2"]}) - - test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} - fails1 = check_multiple_attrs(test_dict, idata1) - assert not fails1 - fails2 = check_multiple_attrs(test_dict, idata2) - assert not fails2 - assert "dim1" in list(idata1.posterior.beta.dims) - assert "dim2" in list(idata2.posterior.beta.dims) - assert np.all(idata1.constant_data.x.dim1.values == np.array(dim1)) - assert np.all(idata1.constant_data.x.dim2.values == np.array(["c1", "c2"])) - assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1)) - assert np.all(idata2.constant_data.x.dim2.values == np.array(["c1", "c2"])) - - def test_missing_data_model(self): - # source pymc3/pymc3/tests/test_missing.py - data = ma.masked_values([1, 2, -1, 4, -1], value=-1) - model = pm.Model() - with model: - x = pm.Normal("x", 1, 1) - pm.Normal("y", x, 1, observed=data) - trace = pm.sample(100, chains=2) - - # make sure that data is really missing - (y_missing,) = model.missing_values - assert y_missing.tag.test_value.shape == (2,) - inference_data = from_pymc3(trace=trace, model=model) - test_dict = {"posterior": ["x"], "observed_data": ["y"], "log_likelihood": ["y"]} - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - - def test_mv_missing_data_model(self): - data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1) - - model = pm.Model() - with model: - mu = pm.Normal("mu", 0, 1, shape=2) - sd_dist = pm.HalfNormal.dist(1.0) - chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True) - pm.MvNormal("y", mu=mu, chol=chol, observed=data) - trace = pm.sample(100, chains=2) - - # make sure that data is really missing - (y_missing,) = model.missing_values - assert y_missing.tag.test_value.shape == (4,) - inference_data = from_pymc3(trace=trace, model=model) - test_dict = { - "posterior": ["mu", "chol_cov"], - "observed_data": ["y"], - "log_likelihood": ["y"], - } - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - - @pytest.mark.parametrize("log_likelihood", [True, False, ["y1"]]) - def test_multiple_observed_rv(self, log_likelihood): - y1_data = np.random.randn(10) - y2_data = np.random.randn(100) - with pm.Model(): - x = pm.Normal("x", 1, 1) - pm.Normal("y1", x, 1, observed=y1_data) - pm.Normal("y2", x, 1, observed=y2_data) - trace = pm.sample(100, chains=2) - inference_data = from_pymc3(trace=trace, log_likelihood=log_likelihood) - test_dict = { - "posterior": ["x"], - "observed_data": ["y1", "y2"], - "log_likelihood": ["y1", "y2"], - "sample_stats": ["diverging", "lp", "~log_likelihood"], - } - if not log_likelihood: - test_dict.pop("log_likelihood") - test_dict["~log_likelihood"] = [""] - if isinstance(log_likelihood, list): - test_dict["log_likelihood"] = ["y1", "~y2"] - - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - - @pytest.mark.skipif( - version_info < (3, 6), reason="Requires updated PyMC3, which needs Python 3.6" - ) - def test_multiple_observed_rv_without_observations(self): - with pm.Model(): - mu = pm.Normal("mu") - x = pm.DensityDist( # pylint: disable=unused-variable - "x", pm.Normal.dist(mu, 1.0).logp, observed={"value": 0.1} - ) - trace = pm.sample(100, chains=2) - inference_data = from_pymc3(trace=trace) - test_dict = { - "posterior": ["mu"], - "sample_stats": ["lp"], - "log_likelihood": ["x"], - "observed_data": ["value", "~x"], - } - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - assert inference_data.observed_data.value.dtype.kind == "f" - - @pytest.mark.parametrize("multiobs", (True, False)) - def test_multiobservedrv_to_observed_data(self, multiobs): - # fake regression data, with weights (W) - np.random.seed(2019) - N = 100 - X = np.random.uniform(size=N) - W = 1 + np.random.poisson(size=N) - a, b = 5, 17 - Y = a + np.random.normal(b * X) - - with pm.Model(): - a = pm.Normal("a", 0, 10) - b = pm.Normal("b", 0, 10) - mu = a + b * X - sigma = pm.HalfNormal("sigma", 1) - - def weighted_normal(y, w): - return w * pm.Normal.dist(mu=mu, sd=sigma).logp(y) - - y_logp = pm.DensityDist( # pylint: disable=unused-variable - "y_logp", weighted_normal, observed={"y": Y, "w": W} - ) - trace = pm.sample(20, tune=20) - idata = from_pymc3(trace, density_dist_obs=multiobs) - multiobs_str = "" if multiobs else "~" - test_dict = { - "posterior": ["a", "b", "sigma"], - "sample_stats": ["lp"], - "log_likelihood": ["y_logp"], - f"{multiobs_str}observed_data": ["y", "w"], - } - fails = check_multiple_attrs(test_dict, idata) - assert not fails - if multiobs: - assert idata.observed_data.y.dtype.kind == "f" - - def test_single_observation(self): - with pm.Model(): - p = pm.Uniform("p", 0, 1) - pm.Binomial("w", p=p, n=2, observed=1) - trace = pm.sample(500, chains=2) - inference_data = from_pymc3(trace=trace) - - assert inference_data - - def test_potential(self): - with pm.Model(): - x = pm.Normal("x", 0.0, 1.0) - pm.Potential("z", pm.Normal.dist(x, 1.0).logp(np.random.randn(10))) - trace = pm.sample(100, chains=2) - inference_data = from_pymc3(trace=trace) - - assert inference_data - - @pytest.mark.parametrize("use_context", [True, False]) - def test_constant_data(self, use_context): - """Test constant_data group behaviour.""" - with pm.Model(): - x = pm.Data("x", [1.0, 2.0, 3.0]) - y = pm.Data("y", [1.0, 2.0, 3.0]) - beta = pm.Normal("beta", 0, 1) - obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable - trace = pm.sample(100, tune=100) - if use_context: - inference_data = from_pymc3(trace=trace) - - if not use_context: - inference_data = from_pymc3(trace=trace) - test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - - def test_predictions_constant_data(self): - with pm.Model(): - x = pm.Data("x", [1.0, 2.0, 3.0]) - y = pm.Data("y", [1.0, 2.0, 3.0]) - beta = pm.Normal("beta", 0, 1) - obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable - trace = pm.sample(100, tune=100) - - inference_data = from_pymc3(trace=trace) - test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - - with pm.Model(): - x = pm.Data("x", [1.0, 2.0]) - y = pm.Data("y", [1.0, 2.0]) - beta = pm.Normal("beta", 0, 1) - obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable - predictive_trace = pm.sample_posterior_predictive(trace) - assert set(predictive_trace.keys()) == {"obs"} - # this should be four chains of 100 samples - # assert predictive_trace["obs"].shape == (400, 2) - # but the shape seems to vary between pymc3 versions - inference_data = from_pymc3_predictions(predictive_trace, posterior_trace=trace) - test_dict = {"posterior": ["beta"], "~observed_data": [""]} - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails, "Posterior data not copied over as expected." - test_dict = {"predictions": ["obs"]} - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails, "Predictions not instantiated as expected." - test_dict = {"predictions_constant_data": ["x"]} - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails, "Predictions constant data not instantiated as expected." - - def test_no_trace(self): - with pm.Model() as model: - x = pm.Data("x", [1.0, 2.0, 3.0]) - y = pm.Data("y", [1.0, 2.0, 3.0]) - beta = pm.Normal("beta", 0, 1) - obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable - trace = pm.sample(100, tune=100) - prior = pm.sample_prior_predictive() - posterior_predictive = pm.sample_posterior_predictive(trace) - - # Only prior - inference_data = from_pymc3(prior=prior, model=model) - test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]} - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - # Only posterior_predictive - inference_data = from_pymc3(posterior_predictive=posterior_predictive, model=model) - test_dict = {"posterior_predictive": ["obs"]} - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - # Prior and posterior_predictive but no trace - inference_data = from_pymc3( - prior=prior, posterior_predictive=posterior_predictive, model=model - ) - test_dict = { - "prior": ["beta"], - "prior_predictive": ["obs"], - "posterior_predictive": ["obs"], - } - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - - @pytest.mark.parametrize("use_context", [True, False]) - def test_priors_with_model(self, use_context): - """Test model is enough to get prior, prior predictive and observed_data.""" - with pm.Model() as model: - x = pm.Data("x", [1.0, 2.0, 3.0]) - y = pm.Data("y", [1.0, 2.0, 3.0]) - beta = pm.Normal("beta", 0, 1) - obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable - prior = pm.sample_prior_predictive() - - test_dict = { - "prior": ["beta", "~obs"], - "observed_data": ["obs"], - "prior_predictive": ["obs"], - } - if use_context: - with model: # pylint: disable=not-context-manager - inference_data = from_pymc3(prior=prior) - else: - inference_data = from_pymc3(prior=prior, model=model) - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - - def test_no_model_deprecation(self): - with pm.Model(): - x = pm.Data("x", [1.0, 2.0, 3.0]) - y = pm.Data("y", [1.0, 2.0, 3.0]) - beta = pm.Normal("beta", 0, 1) - obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable - prior = pm.sample_prior_predictive() - - with pytest.warns(FutureWarning, match="without the model"): - inference_data = from_pymc3(prior=prior) - test_dict = { - "prior": ["beta", "obs"], - "~prior_predictive": [""], - } - fails = check_multiple_attrs(test_dict, inference_data) - assert not fails - - def test_multivariate_observations(self): - coords = {"direction": ["x", "y", "z"], "experiment": np.arange(20)} - data = np.random.multinomial(20, [0.2, 0.3, 0.5], size=20) - with pm.Model(coords=coords): - p = pm.Beta("p", 1, 1, shape=(3,)) - pm.Multinomial("y", 20, p, dims=("experiment", "direction"), observed=data) - idata = pm.sample(draws=50, tune=100, return_inferencedata=True) - test_dict = { - "posterior": ["p"], - "sample_stats": ["lp"], - "log_likelihood": ["y"], - "observed_data": ["y"], - } - fails = check_multiple_attrs(test_dict, idata) - assert not fails - assert "direction" not in idata.log_likelihood.dims - assert "direction" in idata.observed_data.dims - - -class TestPyMC3WarmupHandling: - @pytest.mark.skipif( - not hasattr(pm.backends.base.SamplerReport, "n_draws"), - reason="requires pymc3 3.9 or higher", - ) - @pytest.mark.parametrize("save_warmup", [False, True]) - @pytest.mark.parametrize("chains", [1, 2]) - @pytest.mark.parametrize("tune,draws", [(0, 50), (10, 40), (30, 0)]) - def test_save_warmup(self, save_warmup, chains, tune, draws): - with pm.Model(): - pm.Uniform("u1") - pm.Normal("n1") - trace = pm.sample( - tune=tune, - draws=draws, - chains=chains, - cores=1, - step=pm.Metropolis(), - discard_tuned_samples=False, - ) - assert isinstance(trace, pm.backends.base.MultiTrace) - idata = from_pymc3(trace, save_warmup=save_warmup) - warmup_prefix = "" if save_warmup and (tune > 0) else "~" - post_prefix = "" if draws > 0 else "~" - test_dict = { - f"{post_prefix}posterior": ["u1", "n1"], - f"{post_prefix}sample_stats": ["~tune", "accept"], - f"{warmup_prefix}warmup_posterior": ["u1", "n1"], - f"{warmup_prefix}warmup_sample_stats": ["~tune"], - "~warmup_log_likelihood": [""], - "~log_likelihood": [""], - } - fails = check_multiple_attrs(test_dict, idata) - assert not fails - if hasattr(idata, "posterior"): - assert idata.posterior.dims["chain"] == chains - assert idata.posterior.dims["draw"] == draws - if hasattr(idata, "warmup_posterior"): - assert idata.warmup_posterior.dims["chain"] == chains - assert idata.warmup_posterior.dims["draw"] == tune - - @pytest.mark.skipif( - hasattr(pm.backends.base.SamplerReport, "n_draws"), - reason="requires pymc3 3.8 or lower", - ) - def test_save_warmup_issue_1208_before_3_9(self): - with pm.Model(): - pm.Uniform("u1") - pm.Normal("n1") - trace = pm.sample( - tune=100, - draws=200, - chains=2, - cores=1, - step=pm.Metropolis(), - discard_tuned_samples=False, - ) - assert isinstance(trace, pm.backends.base.MultiTrace) - assert len(trace) == 300 - - # <=3.8 did not track n_draws in the sampler report, - # making from_pymc3 fall back to len(trace) and triggering a warning - with pytest.warns(UserWarning, match="Warmup samples"): - idata = from_pymc3(trace, save_warmup=True) - test_dict = { - "posterior": ["u1", "n1"], - "sample_stats": ["~tune", "accept"], - "~warmup_posterior": [""], - "~warmup_sample_stats": [""], - } - fails = check_multiple_attrs(test_dict, idata) - assert not fails - assert idata.posterior.dims["draw"] == 300 - assert idata.posterior.dims["chain"] == 2 - - @pytest.mark.skipif( - not hasattr(pm.backends.base.SamplerReport, "n_draws"), - reason="requires pymc3 3.9 or higher", - ) - def test_save_warmup_issue_1208_after_3_9(self): - with pm.Model(): - pm.Uniform("u1") - pm.Normal("n1") - trace = pm.sample( - tune=100, - draws=200, - chains=2, - cores=1, - step=pm.Metropolis(), - discard_tuned_samples=False, - ) - assert isinstance(trace, pm.backends.base.MultiTrace) - assert len(trace) == 300 - - # from original trace, warmup draws should be separated out - idata = from_pymc3(trace, save_warmup=True) - test_dict = { - "posterior": ["u1", "n1"], - "sample_stats": ["~tune", "accept"], - "warmup_posterior": ["u1", "n1"], - "warmup_sample_stats": ["~tune", "accept"], - } - fails = check_multiple_attrs(test_dict, idata) - assert not fails - assert idata.posterior.dims["chain"] == 2 - assert idata.posterior.dims["draw"] == 200 - - # manually sliced trace triggers the same warning as <=3.8 - with pytest.warns(UserWarning, match="Warmup samples"): - idata = from_pymc3(trace[-30:], save_warmup=True) - test_dict = { - "posterior": ["u1", "n1"], - "sample_stats": ["~tune", "accept"], - "~warmup_posterior": [""], - "~warmup_sample_stats": [""], - } - fails = check_multiple_attrs(test_dict, idata) - assert not fails - assert idata.posterior.dims["chain"] == 2 - assert idata.posterior.dims["draw"] == 30 diff --git a/arviz/tests/helpers.py b/arviz/tests/helpers.py index 5c2382c9a3..2ab42a191c 100644 --- a/arviz/tests/helpers.py +++ b/arviz/tests/helpers.py @@ -486,20 +486,6 @@ def pystan_noncentered_schools(data, draws, chains): return stan_model, fit -def pymc3_noncentered_schools(data, draws, chains): - """Non-centered eight schools implementation for pymc3.""" - import pymc3 as pm - - with pm.Model() as model: - mu = pm.Normal("mu", mu=0, sd=5) - tau = pm.HalfCauchy("tau", beta=5) - eta = pm.Normal("eta", mu=0, sd=1, shape=data["J"]) - theta = pm.Deterministic("theta", mu + tau * eta) - pm.Normal("obs", mu=theta, sd=data["sigma"], observed=data["y"]) - trace = pm.sample(draws, chains=chains) - return model, trace - - def library_handle(library): """Import a library and return the handle.""" if library == "pystan": @@ -513,11 +499,10 @@ def library_handle(library): def load_cached_models(eight_schools_data, draws, chains, libs=None): - """Load pymc3, pystan, emcee, and pyro models from pickle.""" + """Load pystan, emcee, and pyro models from pickle.""" here = os.path.dirname(os.path.abspath(__file__)) supported = ( ("pystan", pystan_noncentered_schools), - ("pymc3", pymc3_noncentered_schools), ("emcee", emcee_schools_model), ("pyro", pyro_noncentered_schools), ("numpyro", numpyro_schools_model), diff --git a/doc/source/api/data.rst b/doc/source/api/data.rst index 06030f96a9..72b50033e4 100644 --- a/doc/source/api/data.rst +++ b/doc/source/api/data.rst @@ -17,8 +17,6 @@ Inference library converters from_emcee from_numpyro from_pyjags - from_pymc3 - from_pymc3_predictions from_pyro from_pystan diff --git a/requirements-external.txt b/requirements-external.txt index 917381e29f..ca44445aae 100644 --- a/requirements-external.txt +++ b/requirements-external.txt @@ -1,7 +1,6 @@ beanmachine emcee pyjags -pymc3 @ git+https://github.com/pymc-devs/pymc3 pystan cmdstanpy pyro-ppl>=1.0.0 diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000000..25a69e0498 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,6 @@ +pytest +pytest-cov +cloudpickle + +-r requirements-optional.txt +-r requirements-external.txt \ No newline at end of file