Skip to content

Commit

Permalink
add some extra rcparams (#1611)
Browse files Browse the repository at this point in the history
* add some extra rcparams

* black+changelog

* fix imports

* fix logic

* fix tests

* Fix log_likelihood behaviour

* add more tests

* fix typo

* fix lint

* black

* mypy fix

* add plot.density_type

* black

* update changelog

* update mypy.ini

* fix tests

* update empty test

* fix logic again

Co-authored-by: Ari Hartikainen <ahartikainen@users.noreply.github.com>
Co-authored-by: Ari Hartikainen <hartikainen.ari@gmail.com>
Co-authored-by: Ari Hartikainen <ari.hartikainen@ramboll.fi>
  • Loading branch information
4 people authored Mar 26, 2021
1 parent 8a1fd2c commit 23e14fb
Show file tree
Hide file tree
Showing 21 changed files with 240 additions and 127 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* Improved retrieving or pointwise log likelihood in `from_cmdstanpy`, `from_cmdstan` and `from_pystan` ([1579](https://github.com/arviz-devs/arviz/pull/1579) and [1599](https://github.com/arviz-devs/arviz/pull/1599))
* Added interactive legend to bokeh `forestplot` ([1591](https://github.com/arviz-devs/arviz/pull/1591))
* Added interactive legend to bokeh `ppcplot` ([1602](https://github.com/arviz-devs/arviz/pull/1602))
* Added `data.log_likelihood`, `stats.ic_compare_method` and `plot.density_kind` to `rcParams` ([1611](https://github.com/arviz-devs/arviz/pull/1611))

### Maintenance and fixes
* Enforced using coordinate values as default labels ([1201](https://github.com/arviz-devs/arviz/pull/1201))
Expand Down
2 changes: 2 additions & 0 deletions arviz/data/io_cmdstan.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def __init__(
and any(name.split(".")[0] == "log_lik" for name in self.posterior_columns)
):
self.log_likelihood = ["log_lik"]
elif isinstance(self.log_likelihood, bool):
self.log_likelihood = None

@requires("posterior_")
def _parse_posterior(self):
Expand Down
17 changes: 11 additions & 6 deletions arviz/data/io_cmdstanpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,29 @@ def __init__(
self.observed_data = observed_data
self.constant_data = constant_data
self.predictions_constant_data = predictions_constant_data
self.log_likelihood = log_likelihood
self.log_likelihood = (
rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
)
self.index_origin = index_origin
self.coords = coords
self.dims = dims

self.save_warmup = rcParams["data.save_warmup"] if save_warmup is None else save_warmup

if hasattr(self.posterior, "stan_vars_cols"):
if self.log_likelihood is None and "log_lik" in self.posterior.stan_vars_cols:
if self.log_likelihood is True and "log_lik" in self.posterior.stan_vars_cols:
self.log_likelihood = ["log_lik"]
else:
if (
self.log_likelihood is None
self.log_likelihood is True
and self.posterior is not None
and any(name.split("[")[0] == "log_lik" for name in self.posterior.column_names)
):
self.log_likelihood = ["log_lik"]

if isinstance(self.log_likelihood, bool):
self.log_likelihood = None

import cmdstanpy # pylint: disable=import-error

self.cmdstanpy = cmdstanpy
Expand Down Expand Up @@ -733,12 +738,12 @@ def from_cmdstanpy(
Constant data used in the sampling.
predictions_constant_data : dict
Constant data for predictions used in the sampling.
log_likelihood : str, list of str, dict of {str: str}
log_likelihood : str, list of str, dict of {str: str}, optional
Pointwise log_likelihood for the data. If a dict, its keys should represent var_names
from the corresponding observed data and its values the stan variable where the
data is stored. By default, if a variable ``log_lik`` is present in the Stan model,
it will be retrieved as pointwise log likelihood values. Use ``False`` to avoid this
behaviour.
it will be retrieved as pointwise log likelihood values. Use ``False``
or set ``data.log_likelihood`` to false to avoid this behaviour.
index_origin : int, optional
Starting value of integer coordinate values. Defaults to the value in rcParam
``data.index_origin``.
Expand Down
11 changes: 10 additions & 1 deletion arviz/data/io_numpyro.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np

from .. import utils
from ..rcparams import rcParams
from .base import dict_to_dataset, requires
from .inference_data import InferenceData

Expand All @@ -29,6 +30,7 @@ def __init__(
predictions=None,
constant_data=None,
predictions_constant_data=None,
log_likelihood=None,
index_origin=None,
coords=None,
dims=None,
Expand Down Expand Up @@ -70,7 +72,10 @@ def __init__(
self.predictions = predictions
self.constant_data = constant_data
self.predictions_constant_data = predictions_constant_data
self.index_origin = index_origin
self.log_likelihood = (
rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
)
self.index_origin = rcParams["data.index_origin"] if index_origin is None else index_origin
self.coords = coords
self.dims = dims
self.pred_dims = pred_dims
Expand Down Expand Up @@ -170,6 +175,8 @@ def sample_stats_to_xarray(self):
@requires("model")
def log_likelihood_to_xarray(self):
"""Extract log likelihood from NumPyro posterior."""
if not self.log_likelihood:
return None
data = {}
if self.observations is not None:
samples = self.posterior.get_samples(group_by_chain=False)
Expand Down Expand Up @@ -317,6 +324,7 @@ def from_numpyro(
predictions=None,
constant_data=None,
predictions_constant_data=None,
log_likelihood=None,
index_origin=None,
coords=None,
dims=None,
Expand Down Expand Up @@ -359,6 +367,7 @@ def from_numpyro(
predictions=predictions,
constant_data=constant_data,
predictions_constant_data=predictions_constant_data,
log_likelihood=log_likelihood,
index_origin=index_origin,
coords=coords,
dims=dims,
Expand Down
9 changes: 6 additions & 3 deletions arviz/data/io_pymc3.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(
trace=None,
prior=None,
posterior_predictive=None,
log_likelihood=True,
log_likelihood=None,
predictions=None,
coords: Optional[Coords] = None,
dims: Optional[Dims] = None,
Expand Down Expand Up @@ -137,7 +137,9 @@ def __init__(

self.prior = prior
self.posterior_predictive = posterior_predictive
self.log_likelihood = log_likelihood
self.log_likelihood = (
rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
)
self.predictions = predictions

def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray:
Expand Down Expand Up @@ -523,7 +525,7 @@ def from_pymc3(
*,
prior: Optional[Dict[str, Any]] = None,
posterior_predictive: Optional[Dict[str, Any]] = None,
log_likelihood: Union[bool, Iterable[str]] = True,
log_likelihood: Union[bool, Iterable[str], None] = None,
coords: Optional[CoordSpec] = None,
dims: Optional[DimSpec] = None,
model: Optional[Model] = None,
Expand Down Expand Up @@ -551,6 +553,7 @@ def from_pymc3(
log_likelihood : bool or array_like of str, optional
List of variables to calculate `log_likelihood`. Defaults to True which calculates
`log_likelihood` for all observed variables. If set to False, log_likelihood is skipped.
Defaults to the value of rcParam ``data.log_likelihood``.
coords : dict of {str: array-like}, optional
Map of coordinate names to coordinate values
dims : dict of {str: list of str}, optional
Expand Down
65 changes: 28 additions & 37 deletions arviz/data/io_pyro.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import warnings

import numpy as np
import xarray as xr
from packaging import version

from .. import utils
from .base import dict_to_dataset, generate_dims_coords, make_attrs, requires
from ..rcparams import rcParams
from .base import dict_to_dataset, requires
from .inference_data import InferenceData

_log = logging.getLogger(__name__)
Expand All @@ -29,7 +29,7 @@ def __init__(
posterior=None,
prior=None,
posterior_predictive=None,
log_likelihood=True,
log_likelihood=None,
predictions=None,
constant_data=None,
predictions_constant_data=None,
Expand Down Expand Up @@ -66,13 +66,15 @@ def __init__(
self.posterior = posterior
self.prior = prior
self.posterior_predictive = posterior_predictive
self.log_likelihood = log_likelihood
self.log_likelihood = (
rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
)
self.predictions = predictions
self.constant_data = constant_data
self.predictions_constant_data = predictions_constant_data
self.coords = coords
self.dims = dims
self.pred_dims = pred_dims
self.dims = {} if dims is None else dims
self.pred_dims = {} if pred_dims is None else pred_dims
import pyro

def arbitrary_element(dct):
Expand Down Expand Up @@ -226,43 +228,31 @@ def observed_data_to_xarray(self):
dims = {}
else:
dims = self.dims
observed_data = {}
for name, vals in self.observations.items():
vals = utils.one_de(vals)
val_dims = dims.get(name)
val_dims, coords = generate_dims_coords(
vals.shape, name, dims=val_dims, coords=self.coords
)
# filter coords based on the dims
coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims}
observed_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords)
return xr.Dataset(data_vars=observed_data, attrs=make_attrs(library=self.pyro))

def convert_constant_data_to_xarray(self, dct, dims):
"""Convert constant_data or predictions_constant_data to xarray."""
if dims is None:
dims = {}
constant_data = {}
for name, vals in dct.items():
vals = utils.one_de(vals)
val_dims = dims.get(name)
val_dims, coords = generate_dims_coords(
vals.shape, name, dims=val_dims, coords=self.coords
)
# filter coords based on the dims
coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims}
constant_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords)
return xr.Dataset(data_vars=constant_data, attrs=make_attrs(library=self.pyro))
return dict_to_dataset(
self.observations, library=self.pyro, coords=self.coords, dims=dims, default_dims=[]
)

@requires("constant_data")
def constant_data_to_xarray(self):
"""Convert constant_data to xarray."""
return self.convert_constant_data_to_xarray(self.constant_data, self.dims)
return dict_to_dataset(
self.constant_data,
library=self.pyro,
coords=self.coords,
dims=self.dims,
default_dims=[],
)

@requires("predictions_constant_data")
def predictions_constant_data_to_xarray(self):
"""Convert predictions_constant_data to xarray."""
return self.convert_constant_data_to_xarray(self.predictions_constant_data, self.pred_dims)
return dict_to_dataset(
self.predictions_constant_data,
library=self.pyro,
coords=self.coords,
dims=self.pred_dims,
default_dims=[],
)

def to_inference_data(self):
"""Convert all available data to an InferenceData object."""
Expand All @@ -286,7 +276,7 @@ def from_pyro(
*,
prior=None,
posterior_predictive=None,
log_likelihood=True,
log_likelihood=None,
predictions=None,
constant_data=None,
predictions_constant_data=None,
Expand All @@ -310,7 +300,8 @@ def from_pyro(
posterior_predictive : dict
Posterior predictive samples for the posterior
log_likelihood : bool, optional
Calculate and store pointwise log likelihood values.
Calculate and store pointwise log likelihood values. Defaults to the value
of rcParam ``data.log_likelihood``.
predictions: dict
Out of sample predictions
constant_data: dict
Expand Down
21 changes: 16 additions & 5 deletions arviz/data/io_pystan.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,21 @@ def __init__(
self.observed_data = observed_data
self.constant_data = constant_data
self.predictions_constant_data = predictions_constant_data
self.log_likelihood = log_likelihood
self.log_likelihood = (
rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
)
self.coords = coords
self.dims = dims
self.save_warmup = rcParams["data.save_warmup"] if save_warmup is None else save_warmup

if (
self.log_likelihood is None
self.log_likelihood is True
and self.posterior is not None
and "log_lik" in self.posterior.sim["pars_oi"]
):
self.log_likelihood = ["log_lik"]
elif isinstance(self.log_likelihood, bool):
self.log_likelihood = None

import pystan # pylint: disable=import-error

Expand Down Expand Up @@ -316,16 +320,20 @@ def __init__(
self.observed_data = observed_data
self.constant_data = constant_data
self.predictions_constant_data = predictions_constant_data
self.log_likelihood = log_likelihood
self.log_likelihood = (
rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
)
self.coords = coords
self.dims = dims

if (
self.log_likelihood is None
self.log_likelihood is True
and self.posterior is not None
and "log_lik" in self.posterior.param_names
):
self.log_likelihood = ["log_lik"]
elif isinstance(self.log_likelihood, bool):
self.log_likelihood = None

import stan # pylint: disable=import-error

Expand Down Expand Up @@ -929,7 +937,10 @@ def from_pystan(
posterior. It is recommended to use this argument as a dictionary whose keys
are observed variable names and its values are the variables storing log
likelihood arrays in the Stan code. In other cases, a dictionary with keys
equal to its values is used.
equal to its values is used. By default, if a variable ``log_lik`` is
present in the Stan model, it will be retrieved as pointwise log
likelihood values. Use ``False`` or set ``data.log_likelihood`` to
false to avoid this behaviour.
coords : dict[str, iterable]
A dictionary containing the values that are used as index. The key
is the name of the dimension, the values are the index values.
Expand Down
4 changes: 2 additions & 2 deletions arviz/plots/backends/bokeh/energyplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def plot_energy(
fill_kwargs = {} if fill_kwargs is None else fill_kwargs
plot_kwargs = {} if plot_kwargs is None else plot_kwargs
plot_kwargs.setdefault("line_width", line_width)
if kind in {"hist", "histogram"}:
if kind == "hist":
legend = False

if ax is None:
Expand Down Expand Up @@ -103,7 +103,7 @@ def plot_energy(
)
)

elif kind in {"hist", "histogram"}:
elif kind == "hist":
hist_kwargs = plot_kwargs.copy()
hist_kwargs.update(**fill_kwargs)

Expand Down
4 changes: 2 additions & 2 deletions arviz/plots/backends/matplotlib/energyplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def plot_energy(
_, ax = create_axes_grid(1, backend_kwargs=backend_kwargs)

fill_kwargs = matplotlib_kwarg_dealiaser(fill_kwargs, "hexbin")
types = "hist" if kind in {"hist", "histogram"} else "plot"
types = "hist" if kind == "hist" else "plot"
plot_kwargs = matplotlib_kwarg_dealiaser(plot_kwargs, types)

_colors = [
Expand Down Expand Up @@ -82,7 +82,7 @@ def plot_energy(
ax=ax,
legend=False,
)
elif kind in {"hist", "histogram"}:
elif kind == "hist":
for alpha, color, label, value in series:
ax.hist(
value.flatten(),
Expand Down
7 changes: 4 additions & 3 deletions arviz/plots/distplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ def plot_dist(
color : string
valid matplotlib color
kind : string
By default ("auto") continuous variables are plotted using KDEs and discrete ones using
histograms. To override this use "hist" to plot histograms and "kde" for KDEs
By default ("auto") continuous variables will use the kind defined by rcParam
``plot.density_kind`` and discrete ones will use histograms.
To override this use "hist" to plot histograms and "kde" for KDEs
cumulative : bool
If true plot the estimated cumulative distribution function. Defaults to False.
Ignored for 2D KDE
Expand Down Expand Up @@ -172,7 +173,7 @@ def plot_dist(
raise TypeError('Invalid "kind":{}. Select from {{"auto","kde","hist"}}'.format(kind))

if kind == "auto":
kind = "hist" if values.dtype.kind == "i" else "kde"
kind = "hist" if values.dtype.kind == "i" else rcParams["plot.density_kind"]

dist_plot_args = dict(
# User Facing API that can be simplified
Expand Down
Loading

0 comments on commit 23e14fb

Please sign in to comment.