arviz-devs · OriolAbril · Feb 22, 2024 · Feb 21, 2024 · Feb 21, 2024 · Feb 21, 2024
diff --git a/.azure-pipelines/azure-pipelines-base.yml b/.azure-pipelines/azure-pipelines-base.yml
@@ -10,10 +10,10 @@ jobs:
   timeoutInMinutes: 360
   strategy:
     matrix:
-      Python_39:
-        python.version: "3.9"
+      Python_312:
+        python.version: "3.12"
         PyPIGithub: false
-        name: "Python 3.9"
+        name: "Python 3.12"
       Python_311:
         python.version: "3.11"
         PyPIGithub: false

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### New features
 
 ### Maintenance and fixes
+- Fix deprecations introduced in latest pandas and xarray versions, and prepare for numpy 2.0 ones ([2315](https://github.com/arviz-devs/arviz/pull/2315)))
 
 ### Deprecation
 

diff --git a/arviz/data/inference_data.py b/arviz/data/inference_data.py
@@ -1490,7 +1490,7 @@ def add_groups(self, group_dict=None, coords=None, dims=None, **kwargs):
 
             import numpy as np
             rng = np.random.default_rng(73)
-            ary = rng.normal(size=(post.dims["chain"], post.dims["draw"], obs.dims["match"]))
+            ary = rng.normal(size=(post.sizes["chain"], post.sizes["draw"], obs.sizes["match"]))
             idata.add_groups(
                 log_likelihood={"home_points": ary},
                 dims={"home_points": ["match"]},

diff --git a/arviz/plots/backends/bokeh/bpvplot.py b/arviz/plots/backends/bokeh/bpvplot.py
@@ -171,8 +171,13 @@ def plot_bpv(
                 ax_i.line(0, 0, legend_label=f"bpv={p_value:.2f}", alpha=0)
 
             if plot_mean:
-                ax_i.circle(
-                    obs_vals.mean(), 0, fill_color=color, line_color="black", size=markersize
+                ax_i.scatter(
+                    obs_vals.mean(),
+                    0,
+                    fill_color=color,
+                    line_color="black",
+                    size=markersize,
+                    marker="circle",
                 )
 
         _title = Title()

diff --git a/arviz/plots/backends/bokeh/compareplot.py b/arviz/plots/backends/bokeh/compareplot.py
@@ -69,13 +69,14 @@
             err_ys.append((y, y))
 
         # plot them
-        dif_tri = ax.triangle(
+        dif_tri = ax.scatter(
             comp_df[information_criterion].iloc[1:],
             yticks_pos[1::2],
             line_color=plot_kwargs.get("color_dse", "grey"),
             fill_color=plot_kwargs.get("color_dse", "grey"),
             line_width=2,
             size=6,
+            marker="triangle",
         )
         dif_line = ax.multi_line(err_xs, err_ys, line_color=plot_kwargs.get("color_dse", "grey"))
 
@@ -85,13 +86,14 @@
         ax.yaxis.ticker = yticks_pos[::2]
         ax.yaxis.major_label_overrides = dict(zip(yticks_pos[::2], yticks_labels))
 
-    elpd_circ = ax.circle(
+    elpd_circ = ax.scatter(
         comp_df[information_criterion],
         yticks_pos[::2],
         line_color=plot_kwargs.get("color_ic", "black"),
         fill_color=None,
         line_width=2,
         size=6,
+        marker="circle",
     )
     elpd_label = [elpd_circ]
 
@@ -110,7 +112,7 @@
 
     labels.append(("ELPD", elpd_label))
 
-    scale = comp_df["scale"][0]
+    scale = comp_df["scale"].iloc[0]
 
     if insample_dev:
         p_ic = comp_df[f"p_{information_criterion.split('_')[1]}"]
@@ -120,13 +122,14 @@
             correction = -p_ic
         elif scale == "deviance":
             correction = -(2 * p_ic)
-        insample_circ = ax.circle(
+        insample_circ = ax.scatter(
             comp_df[information_criterion] + correction,
             yticks_pos[::2],
             line_color=plot_kwargs.get("color_insample_dev", "black"),
             fill_color=plot_kwargs.get("color_insample_dev", "black"),
             line_width=2,
             size=6,
+            marker="circle",
         )
         labels.append(("In-sample ELPD", [insample_circ]))
 

diff --git a/arviz/plots/backends/bokeh/forestplot.py b/arviz/plots/backends/bokeh/forestplot.py
@@ -640,15 +640,15 @@ def iterator(self):
             grouped_data = [[(0, datum)] for datum in self.data]
             skip_dims = self.combine_dims.union({"chain"})
         else:
-            grouped_data = [datum.groupby("chain") for datum in self.data]
+            grouped_data = [datum.groupby("chain", squeeze=False) for datum in self.data]
             skip_dims = self.combine_dims
 
         label_dict = OrderedDict()
         selection_list = []
         for name, grouped_datum in zip(self.model_names, grouped_data):
             for _, sub_data in grouped_datum:
                 datum_iter = xarray_var_iter(
-                    sub_data,
+                    sub_data.squeeze(),
                     var_names=[self.var_name],
                     skip_dims=skip_dims,
                     reverse_selections=True,

diff --git a/arviz/plots/backends/matplotlib/compareplot.py b/arviz/plots/backends/matplotlib/compareplot.py
@@ -84,7 +84,7 @@ def plot_compare(
     else:
         ax.set_yticks(yticks_pos[::2])
 
-    scale = comp_df["scale"][0]
+    scale = comp_df["scale"].iloc[0]
 
     if insample_dev:
         p_ic = comp_df[f"p_{information_criterion.split('_')[1]}"]

diff --git a/arviz/plots/backends/matplotlib/forestplot.py b/arviz/plots/backends/matplotlib/forestplot.py
@@ -536,15 +536,15 @@ def iterator(self):
             grouped_data = [[(0, datum)] for datum in self.data]
             skip_dims = self.combine_dims.union({"chain"})
         else:
-            grouped_data = [datum.groupby("chain") for datum in self.data]
+            grouped_data = [datum.groupby("chain", squeeze=False) for datum in self.data]
             skip_dims = self.combine_dims
 
         label_dict = OrderedDict()
         selection_list = []
         for name, grouped_datum in zip(self.model_names, grouped_data):
             for _, sub_data in grouped_datum:
                 datum_iter = xarray_var_iter(
-                    sub_data,
+                    sub_data.squeeze(),
                     var_names=[self.var_name],
                     skip_dims=skip_dims,
                     reverse_selections=True,

diff --git a/arviz/plots/backends/matplotlib/traceplot.py b/arviz/plots/backends/matplotlib/traceplot.py
@@ -430,7 +430,7 @@ def plot_trace(
             Line2D(
                 [], [], label=chain_id, **dealiase_sel_kwargs(legend_kwargs, chain_prop, chain_id)
             )
-            for chain_id in range(data.dims["chain"])
+            for chain_id in range(data.sizes["chain"])
         ]
         if combined:
             handles.insert(

diff --git a/arviz/plots/bfplot.py b/arviz/plots/bfplot.py
@@ -38,7 +38,7 @@ def plot_bf(
     algorithm presented in [1]_.
 
     Parameters
-    -----------
+    ----------
     idata : InferenceData
         Any object that can be converted to an :class:`arviz.InferenceData` object
         Refer to documentation of :func:`arviz.convert_to_dataset` for details.
@@ -52,16 +52,16 @@ def plot_bf(
         Tuple of valid Matplotlib colors. First element for the prior, second for the posterior.
     figsize : (float, float), optional
         Figure size. If `None` it will be defined automatically.
-    textsize: float, optional
+    textsize : float, optional
         Text size scaling factor for labels, titles and lines. If `None` it will be auto
         scaled based on `figsize`.
-    plot_kwargs : dicts, optional
+    plot_kwargs : dict, optional
         Additional keywords passed to :func:`matplotlib.pyplot.plot`.
-    hist_kwargs : dicts, optional
+    hist_kwargs : dict, optional
         Additional keywords passed to :func:`arviz.plot_dist`. Only works for discrete variables.
     ax : axes, optional
         :class:`matplotlib.axes.Axes` or :class:`bokeh.plotting.Figure`.
-    backend :{"matplotlib", "bokeh"}, default "matplotlib"
+    backend : {"matplotlib", "bokeh"}, default "matplotlib"
         Select plotting backend.
     backend_kwargs : dict, optional
         These are kwargs specific to the backend being used, passed to
@@ -78,7 +78,7 @@ def plot_bf(
     References
     ----------
     .. [1] Heck, D., 2019. A caveat on the Savage-Dickey density ratio:
-    The case of computing Bayes factors for regression parameters.
+       The case of computing Bayes factors for regression parameters.
 
     Examples
     --------
@@ -92,6 +92,7 @@ def plot_bf(
         >>> idata = az.from_dict(posterior={"a":np.random.normal(1, 0.5, 5000)},
         ...     prior={"a":np.random.normal(0, 1, 5000)})
         >>> az.plot_bf(idata, var_name="a", ref_val=0)
+
     """
     posterior = extract(idata, var_names=var_name).values
 

diff --git a/arviz/plots/bpvplot.py b/arviz/plots/bpvplot.py
@@ -230,11 +230,11 @@ def plot_bpv(
 
     if flatten_pp is None:
         if flatten is None:
-            flatten_pp = list(predictive_dataset.dims.keys())
+            flatten_pp = list(predictive_dataset.dims)
         else:
             flatten_pp = flatten
     if flatten is None:
-        flatten = list(observed.dims.keys())
+        flatten = list(observed.dims)
 
     if coords is None:
         coords = {}

diff --git a/arviz/plots/compareplot.py b/arviz/plots/compareplot.py
@@ -90,10 +90,10 @@ def plot_compare(
     References
     ----------
     .. [1] Vehtari et al. (2016). Practical Bayesian model evaluation using leave-one-out
-    cross-validation and WAIC https://arxiv.org/abs/1507.04544
+       cross-validation and WAIC https://arxiv.org/abs/1507.04544
 
     .. [2] McElreath R. (2022). Statistical Rethinking A Bayesian Course with Examples in
-    R and Stan, Second edition, CRC Press.
+       R and Stan, Second edition, CRC Press.
 
     Examples
     --------

diff --git a/arviz/plots/elpdplot.py b/arviz/plots/elpdplot.py
@@ -98,7 +98,7 @@ def plot_elpd(
     References
     ----------
     .. [1] Vehtari et al. (2016). Practical Bayesian model evaluation using leave-one-out
-    cross-validation and WAIC https://arxiv.org/abs/1507.04544
+       cross-validation and WAIC https://arxiv.org/abs/1507.04544
 
     Examples
     --------

diff --git a/arviz/plots/essplot.py b/arviz/plots/essplot.py
@@ -202,8 +202,8 @@ def plot_ess(
 
     data = get_coords(convert_to_dataset(idata, group="posterior"), coords)
     var_names = _var_names(var_names, data, filter_vars)
-    n_draws = data.dims["draw"]
-    n_samples = n_draws * data.dims["chain"]
+    n_draws = data.sizes["draw"]
+    n_samples = n_draws * data.sizes["chain"]
 
     ess_tail_dataset = None
     mean_ess = None

diff --git a/arviz/plots/pairplot.py b/arviz/plots/pairplot.py
@@ -229,7 +229,7 @@ def plot_pair(
             )
 
     if gridsize == "auto":
-        gridsize = int(dataset.dims["draw"] ** 0.35)
+        gridsize = int(dataset.sizes["draw"] ** 0.35)
 
     numvars = len(flat_var_names)
 

diff --git a/arviz/plots/ppcplot.py b/arviz/plots/ppcplot.py
@@ -269,11 +269,11 @@ def plot_ppc(
 
     if flatten_pp is None:
         if flatten is None:
-            flatten_pp = list(predictive_dataset.dims.keys())
+            flatten_pp = list(predictive_dataset.dims)
         else:
             flatten_pp = flatten
     if flatten is None:
-        flatten = list(observed_data.dims.keys())
+        flatten = list(observed_data.dims)
 
     if coords is None:
         coords = {}

diff --git a/arviz/stats/density_utils.py b/arviz/stats/density_utils.py
@@ -231,8 +231,8 @@ def _fixed_point(t, N, k_sq, a_sq):
        Z. I. Botev, J. F. Grotowski, and D. P. Kroese.
        Ann. Statist. 38 (2010), no. 5, 2916--2957.
     """
-    k_sq = np.asfarray(k_sq, dtype=np.float64)
-    a_sq = np.asfarray(a_sq, dtype=np.float64)
+    k_sq = np.asarray(k_sq, dtype=np.float64)
+    a_sq = np.asarray(a_sq, dtype=np.float64)
 
     l = 7
     f = np.sum(np.power(k_sq, l) * a_sq * np.exp(-k_sq * np.pi**2 * t))

diff --git a/arviz/stats/diagnostics.py b/arviz/stats/diagnostics.py
@@ -457,10 +457,10 @@ def ks_summary(pareto_tail_indices):
     """
     _numba_flag = Numba.numba_flag
     if _numba_flag:
-        bins = np.asarray([-np.Inf, 0.5, 0.7, 1, np.Inf])
+        bins = np.asarray([-np.inf, 0.5, 0.7, 1, np.inf])
         kcounts, *_ = _histogram(pareto_tail_indices, bins)
     else:
-        kcounts, *_ = _histogram(pareto_tail_indices, bins=[-np.Inf, 0.5, 0.7, 1, np.Inf])
+        kcounts, *_ = _histogram(pareto_tail_indices, bins=[-np.inf, 0.5, 0.7, 1, np.inf])
     kprop = kcounts / len(pareto_tail_indices) * 100
     df_k = pd.DataFrame(
         dict(_=["(good)", "(ok)", "(bad)", "(very bad)"], Count=kcounts, Pct=kprop)

diff --git a/arviz/stats/stats.py b/arviz/stats/stats.py
@@ -146,6 +146,7 @@ def compare(
     Compare the centered and non centered models of the eight school problem:
 
     .. ipython::
+       :okwarning:
 
         In [1]: import arviz as az
            ...: data1 = az.load_arviz_data("non_centered_eight")
@@ -157,6 +158,7 @@ def compare(
     weights using the stacking method.
 
     .. ipython::
+       :okwarning:
 
         In [1]: az.compare(compare_dict, ic="loo", method="stacking", scale="log")
 
@@ -180,37 +182,19 @@ def compare(
     except Exception as e:
         raise e.__class__("Encountered error in ELPD computation of compare.") from e
     names = list(ics_dict.keys())
-    if ic == "loo":
+    if ic in {"loo", "waic"}:
         df_comp = pd.DataFrame(
-            index=names,
-            columns=[
-                "rank",
-                "elpd_loo",
-                "p_loo",
-                "elpd_diff",
-                "weight",
-                "se",
-                "dse",
-                "warning",
-                "scale",
-            ],
-            dtype=np.float_,
-        )
-    elif ic == "waic":
-        df_comp = pd.DataFrame(
-            index=names,
-            columns=[
-                "rank",
-                "elpd_waic",
-                "p_waic",
-                "elpd_diff",
-                "weight",
-                "se",
-                "dse",
-                "warning",
-                "scale",
-            ],
-            dtype=np.float_,
+            {
+                "rank": pd.Series(index=names, dtype="int"),
+                f"elpd_{ic}": pd.Series(index=names, dtype="float"),
+                f"p_{ic}": pd.Series(index=names, dtype="float"),
+                "elpd_diff": pd.Series(index=names, dtype="float"),
+                "weight": pd.Series(index=names, dtype="float"),
+                "se": pd.Series(index=names, dtype="float"),
+                "dse": pd.Series(index=names, dtype="float"),
+                "warning": pd.Series(index=names, dtype="boolean"),
+                "scale": pd.Series(index=names, dtype="str"),
+            }
         )
     else:
         raise NotImplementedError(f"The information criterion {ic} is not supported.")
@@ -632,7 +616,7 @@ def _hdi(ary, hdi_prob, circular, skipna):
     ary = np.sort(ary)
     interval_idx_inc = int(np.floor(hdi_prob * n))
     n_intervals = n - interval_idx_inc
-    interval_width = np.subtract(ary[interval_idx_inc:], ary[:n_intervals], dtype=np.float_)
+    interval_width = np.subtract(ary[interval_idx_inc:], ary[:n_intervals], dtype=np.float64)
 
     if len(interval_width) == 0:
         raise ValueError("Too few elements for interval calculation. ")
@@ -2096,7 +2080,7 @@ def weight_predictions(idatas, weights=None):
     weights /= weights.sum()
 
     len_idatas = [
-        idata.posterior_predictive.dims["chain"] * idata.posterior_predictive.dims["draw"]
+        idata.posterior_predictive.sizes["chain"] * idata.posterior_predictive.sizes["draw"]
         for idata in idatas
     ]
 

diff --git a/arviz/stats/stats_utils.py b/arviz/stats/stats_utils.py
@@ -484,7 +484,7 @@ def __str__(self):
             base += "\n\nThere has been a warning during the calculation. Please check the results."
 
         if kind == "loo" and "pareto_k" in self:
-            bins = np.asarray([-np.Inf, 0.5, 0.7, 1, np.Inf])
+            bins = np.asarray([-np.inf, 0.5, 0.7, 1, np.inf])
             counts, *_ = _histogram(self.pareto_k.values, bins)
             extended = POINTWISE_LOO_FMT.format(max(4, len(str(np.max(counts)))))
             extended = extended.format(