add some extra rcparams (#1611)

* add some extra rcparams * black+changelog * fix imports * fix logic * fix tests * Fix log_likelihood behaviour * add more tests * fix typo * fix lint * black * mypy fix * add plot.density_type * black * update changelog * update mypy.ini * fix tests * update empty test * fix logic again Co-authored-by: Ari Hartikainen <ahartikainen@users.noreply.github.com> Co-authored-by: Ari Hartikainen <hartikainen.ari@gmail.com> Co-authored-by: Ari Hartikainen <ari.hartikainen@ramboll.fi>
arviz-devs · Mar 26, 2021 · 23e14fb · 23e14fb
1 parent 8a1fd2c
commit 23e14fb
Show file tree

Hide file tree

Showing 21 changed files with 240 additions and 127 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@
 * Improved retrieving or pointwise log likelihood in `from_cmdstanpy`, `from_cmdstan` and `from_pystan` ([1579](https://github.com/arviz-devs/arviz/pull/1579) and [1599](https://github.com/arviz-devs/arviz/pull/1599))
 * Added interactive legend to bokeh `forestplot` ([1591](https://github.com/arviz-devs/arviz/pull/1591))
 * Added interactive legend to bokeh `ppcplot` ([1602](https://github.com/arviz-devs/arviz/pull/1602))
+* Added `data.log_likelihood`, `stats.ic_compare_method` and `plot.density_kind` to `rcParams` ([1611](https://github.com/arviz-devs/arviz/pull/1611))
 
 ### Maintenance and fixes
 * Enforced using coordinate values as default labels ([1201](https://github.com/arviz-devs/arviz/pull/1201))

diff --git a/arviz/data/io_cmdstan.py b/arviz/data/io_cmdstan.py
@@ -96,6 +96,8 @@ def __init__(
             and any(name.split(".")[0] == "log_lik" for name in self.posterior_columns)
         ):
             self.log_likelihood = ["log_lik"]
+        elif isinstance(self.log_likelihood, bool):
+            self.log_likelihood = None
 
     @requires("posterior_")
     def _parse_posterior(self):

diff --git a/arviz/data/io_cmdstanpy.py b/arviz/data/io_cmdstanpy.py
@@ -43,24 +43,29 @@ def __init__(
         self.observed_data = observed_data
         self.constant_data = constant_data
         self.predictions_constant_data = predictions_constant_data
-        self.log_likelihood = log_likelihood
+        self.log_likelihood = (
+            rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
+        )
         self.index_origin = index_origin
         self.coords = coords
         self.dims = dims
 
         self.save_warmup = rcParams["data.save_warmup"] if save_warmup is None else save_warmup
 
         if hasattr(self.posterior, "stan_vars_cols"):
-            if self.log_likelihood is None and "log_lik" in self.posterior.stan_vars_cols:
+            if self.log_likelihood is True and "log_lik" in self.posterior.stan_vars_cols:
                 self.log_likelihood = ["log_lik"]
         else:
             if (
-                self.log_likelihood is None
+                self.log_likelihood is True
                 and self.posterior is not None
                 and any(name.split("[")[0] == "log_lik" for name in self.posterior.column_names)
             ):
                 self.log_likelihood = ["log_lik"]
 
+        if isinstance(self.log_likelihood, bool):
+            self.log_likelihood = None
+
         import cmdstanpy  # pylint: disable=import-error
 
         self.cmdstanpy = cmdstanpy
@@ -733,12 +738,12 @@ def from_cmdstanpy(
         Constant data used in the sampling.
     predictions_constant_data : dict
         Constant data for predictions used in the sampling.
-    log_likelihood : str, list of str, dict of {str: str}
+    log_likelihood : str, list of str, dict of {str: str}, optional
         Pointwise log_likelihood for the data. If a dict, its keys should represent var_names
         from the corresponding observed data and its values the stan variable where the
         data is stored. By default, if a variable ``log_lik`` is present in the Stan model,
-        it will be retrieved as pointwise log likelihood values. Use ``False`` to avoid this
-        behaviour.
+        it will be retrieved as pointwise log likelihood values. Use ``False``
+        or set ``data.log_likelihood`` to false to avoid this behaviour.
     index_origin : int, optional
         Starting value of integer coordinate values. Defaults to the value in rcParam
         ``data.index_origin``.

diff --git a/arviz/data/io_numpyro.py b/arviz/data/io_numpyro.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 from .. import utils
+from ..rcparams import rcParams
 from .base import dict_to_dataset, requires
 from .inference_data import InferenceData
 
@@ -29,6 +30,7 @@ def __init__(
         predictions=None,
         constant_data=None,
         predictions_constant_data=None,
+        log_likelihood=None,
         index_origin=None,
         coords=None,
         dims=None,
@@ -70,7 +72,10 @@ def __init__(
         self.predictions = predictions
         self.constant_data = constant_data
         self.predictions_constant_data = predictions_constant_data
-        self.index_origin = index_origin
+        self.log_likelihood = (
+            rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
+        )
+        self.index_origin = rcParams["data.index_origin"] if index_origin is None else index_origin
         self.coords = coords
         self.dims = dims
         self.pred_dims = pred_dims
@@ -170,6 +175,8 @@ def sample_stats_to_xarray(self):
     @requires("model")
     def log_likelihood_to_xarray(self):
         """Extract log likelihood from NumPyro posterior."""
+        if not self.log_likelihood:
+            return None
         data = {}
         if self.observations is not None:
             samples = self.posterior.get_samples(group_by_chain=False)
@@ -317,6 +324,7 @@ def from_numpyro(
     predictions=None,
     constant_data=None,
     predictions_constant_data=None,
+    log_likelihood=None,
     index_origin=None,
     coords=None,
     dims=None,
@@ -359,6 +367,7 @@ def from_numpyro(
         predictions=predictions,
         constant_data=constant_data,
         predictions_constant_data=predictions_constant_data,
+        log_likelihood=log_likelihood,
         index_origin=index_origin,
         coords=coords,
         dims=dims,

diff --git a/arviz/data/io_pymc3.py b/arviz/data/io_pymc3.py
@@ -63,7 +63,7 @@ def __init__(
         trace=None,
         prior=None,
         posterior_predictive=None,
-        log_likelihood=True,
+        log_likelihood=None,
         predictions=None,
         coords: Optional[Coords] = None,
         dims: Optional[Dims] = None,
@@ -137,7 +137,9 @@ def __init__(
 
         self.prior = prior
         self.posterior_predictive = posterior_predictive
-        self.log_likelihood = log_likelihood
+        self.log_likelihood = (
+            rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
+        )
         self.predictions = predictions
 
         def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray:
@@ -523,7 +525,7 @@ def from_pymc3(
     *,
     prior: Optional[Dict[str, Any]] = None,
     posterior_predictive: Optional[Dict[str, Any]] = None,
-    log_likelihood: Union[bool, Iterable[str]] = True,
+    log_likelihood: Union[bool, Iterable[str], None] = None,
     coords: Optional[CoordSpec] = None,
     dims: Optional[DimSpec] = None,
     model: Optional[Model] = None,
@@ -551,6 +553,7 @@ def from_pymc3(
     log_likelihood : bool or array_like of str, optional
         List of variables to calculate `log_likelihood`. Defaults to True which calculates
         `log_likelihood` for all observed variables. If set to False, log_likelihood is skipped.
+        Defaults to the value of rcParam ``data.log_likelihood``.
     coords : dict of {str: array-like}, optional
         Map of coordinate names to coordinate values
     dims : dict of {str: list of str}, optional

diff --git a/arviz/data/io_pyro.py b/arviz/data/io_pyro.py
@@ -4,11 +4,11 @@
 import warnings
 
 import numpy as np
-import xarray as xr
 from packaging import version
 
 from .. import utils
-from .base import dict_to_dataset, generate_dims_coords, make_attrs, requires
+from ..rcparams import rcParams
+from .base import dict_to_dataset, requires
 from .inference_data import InferenceData
 
 _log = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ def __init__(
         posterior=None,
         prior=None,
         posterior_predictive=None,
-        log_likelihood=True,
+        log_likelihood=None,
         predictions=None,
         constant_data=None,
         predictions_constant_data=None,
@@ -66,13 +66,15 @@ def __init__(
         self.posterior = posterior
         self.prior = prior
         self.posterior_predictive = posterior_predictive
-        self.log_likelihood = log_likelihood
+        self.log_likelihood = (
+            rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
+        )
         self.predictions = predictions
         self.constant_data = constant_data
         self.predictions_constant_data = predictions_constant_data
         self.coords = coords
-        self.dims = dims
-        self.pred_dims = pred_dims
+        self.dims = {} if dims is None else dims
+        self.pred_dims = {} if pred_dims is None else pred_dims
         import pyro
 
         def arbitrary_element(dct):
@@ -226,43 +228,31 @@ def observed_data_to_xarray(self):
             dims = {}
         else:
             dims = self.dims
-        observed_data = {}
-        for name, vals in self.observations.items():
-            vals = utils.one_de(vals)
-            val_dims = dims.get(name)
-            val_dims, coords = generate_dims_coords(
-                vals.shape, name, dims=val_dims, coords=self.coords
-            )
-            # filter coords based on the dims
-            coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims}
-            observed_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords)
-        return xr.Dataset(data_vars=observed_data, attrs=make_attrs(library=self.pyro))
-
-    def convert_constant_data_to_xarray(self, dct, dims):
-        """Convert constant_data or predictions_constant_data to xarray."""
-        if dims is None:
-            dims = {}
-        constant_data = {}
-        for name, vals in dct.items():
-            vals = utils.one_de(vals)
-            val_dims = dims.get(name)
-            val_dims, coords = generate_dims_coords(
-                vals.shape, name, dims=val_dims, coords=self.coords
-            )
-            # filter coords based on the dims
-            coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims}
-            constant_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords)
-        return xr.Dataset(data_vars=constant_data, attrs=make_attrs(library=self.pyro))
+        return dict_to_dataset(
+            self.observations, library=self.pyro, coords=self.coords, dims=dims, default_dims=[]
+        )
 
     @requires("constant_data")
     def constant_data_to_xarray(self):
         """Convert constant_data to xarray."""
-        return self.convert_constant_data_to_xarray(self.constant_data, self.dims)
+        return dict_to_dataset(
+            self.constant_data,
+            library=self.pyro,
+            coords=self.coords,
+            dims=self.dims,
+            default_dims=[],
+        )
 
     @requires("predictions_constant_data")
     def predictions_constant_data_to_xarray(self):
         """Convert predictions_constant_data to xarray."""
-        return self.convert_constant_data_to_xarray(self.predictions_constant_data, self.pred_dims)
+        return dict_to_dataset(
+            self.predictions_constant_data,
+            library=self.pyro,
+            coords=self.coords,
+            dims=self.pred_dims,
+            default_dims=[],
+        )
 
     def to_inference_data(self):
         """Convert all available data to an InferenceData object."""
@@ -286,7 +276,7 @@ def from_pyro(
     *,
     prior=None,
     posterior_predictive=None,
-    log_likelihood=True,
+    log_likelihood=None,
     predictions=None,
     constant_data=None,
     predictions_constant_data=None,
@@ -310,7 +300,8 @@ def from_pyro(
     posterior_predictive : dict
         Posterior predictive samples for the posterior
     log_likelihood : bool, optional
-        Calculate and store pointwise log likelihood values.
+        Calculate and store pointwise log likelihood values. Defaults to the value
+        of rcParam ``data.log_likelihood``.
     predictions: dict
         Out of sample predictions
     constant_data: dict

diff --git a/arviz/data/io_pystan.py b/arviz/data/io_pystan.py
@@ -49,17 +49,21 @@ def __init__(
         self.observed_data = observed_data
         self.constant_data = constant_data
         self.predictions_constant_data = predictions_constant_data
-        self.log_likelihood = log_likelihood
+        self.log_likelihood = (
+            rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
+        )
         self.coords = coords
         self.dims = dims
         self.save_warmup = rcParams["data.save_warmup"] if save_warmup is None else save_warmup
 
         if (
-            self.log_likelihood is None
+            self.log_likelihood is True
             and self.posterior is not None
             and "log_lik" in self.posterior.sim["pars_oi"]
         ):
             self.log_likelihood = ["log_lik"]
+        elif isinstance(self.log_likelihood, bool):
+            self.log_likelihood = None
 
         import pystan  # pylint: disable=import-error
 
@@ -316,16 +320,20 @@ def __init__(
         self.observed_data = observed_data
         self.constant_data = constant_data
         self.predictions_constant_data = predictions_constant_data
-        self.log_likelihood = log_likelihood
+        self.log_likelihood = (
+            rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
+        )
         self.coords = coords
         self.dims = dims
 
         if (
-            self.log_likelihood is None
+            self.log_likelihood is True
             and self.posterior is not None
             and "log_lik" in self.posterior.param_names
         ):
             self.log_likelihood = ["log_lik"]
+        elif isinstance(self.log_likelihood, bool):
+            self.log_likelihood = None
 
         import stan  # pylint: disable=import-error
 
@@ -929,7 +937,10 @@ def from_pystan(
         posterior. It is recommended to use this argument as a dictionary whose keys
         are observed variable names and its values are the variables storing log
         likelihood arrays in the Stan code. In other cases, a dictionary with keys
-        equal to its values is used.
+        equal to its values is used. By default, if a variable ``log_lik`` is
+        present in the Stan model, it will be retrieved as pointwise log
+        likelihood values. Use ``False`` or set ``data.log_likelihood`` to
+        false to avoid this behaviour.
     coords : dict[str, iterable]
         A dictionary containing the values that are used as index. The key
         is the name of the dimension, the values are the index values.

diff --git a/arviz/plots/backends/bokeh/energyplot.py b/arviz/plots/backends/bokeh/energyplot.py
@@ -45,7 +45,7 @@ def plot_energy(
     fill_kwargs = {} if fill_kwargs is None else fill_kwargs
     plot_kwargs = {} if plot_kwargs is None else plot_kwargs
     plot_kwargs.setdefault("line_width", line_width)
-    if kind in {"hist", "histogram"}:
+    if kind == "hist":
         legend = False
 
     if ax is None:
@@ -103,7 +103,7 @@ def plot_energy(
                 )
             )
 
-    elif kind in {"hist", "histogram"}:
+    elif kind == "hist":
         hist_kwargs = plot_kwargs.copy()
         hist_kwargs.update(**fill_kwargs)
 

diff --git a/arviz/plots/backends/matplotlib/energyplot.py b/arviz/plots/backends/matplotlib/energyplot.py
@@ -43,7 +43,7 @@ def plot_energy(
         _, ax = create_axes_grid(1, backend_kwargs=backend_kwargs)
 
     fill_kwargs = matplotlib_kwarg_dealiaser(fill_kwargs, "hexbin")
-    types = "hist" if kind in {"hist", "histogram"} else "plot"
+    types = "hist" if kind == "hist" else "plot"
     plot_kwargs = matplotlib_kwarg_dealiaser(plot_kwargs, types)
 
     _colors = [
@@ -82,7 +82,7 @@ def plot_energy(
                 ax=ax,
                 legend=False,
             )
-    elif kind in {"hist", "histogram"}:
+    elif kind == "hist":
         for alpha, color, label, value in series:
             ax.hist(
                 value.flatten(),

diff --git a/arviz/plots/distplot.py b/arviz/plots/distplot.py
@@ -51,8 +51,9 @@ def plot_dist(
     color : string
         valid matplotlib color
     kind : string
-        By default ("auto") continuous variables are plotted using KDEs and discrete ones using
-        histograms. To override this use "hist" to plot histograms and "kde" for KDEs
+        By default ("auto") continuous variables will use the kind defined by rcParam
+        ``plot.density_kind`` and discrete ones will use histograms.
+        To override this use "hist" to plot histograms and "kde" for KDEs
     cumulative : bool
         If true plot the estimated cumulative distribution function. Defaults to False.
         Ignored for 2D KDE
@@ -172,7 +173,7 @@ def plot_dist(
         raise TypeError('Invalid "kind":{}. Select from {{"auto","kde","hist"}}'.format(kind))
 
     if kind == "auto":
-        kind = "hist" if values.dtype.kind == "i" else "kde"
+        kind = "hist" if values.dtype.kind == "i" else rcParams["plot.density_kind"]
 
     dist_plot_args = dict(
         # User Facing API that can be simplified