general maintenance (#2315)

* general maintenance * update changelog and check linters * keep everything bokeh2 * fix squeeze behaviour * black * add unconstrained groups to list of recognized groups * attempt fixing benchmarks
arviz-devs · Feb 22, 2024 · 2631d13 · 2631d13
1 parent 7c1637f
commit 2631d13
Show file tree

Hide file tree

Showing 28 changed files with 83 additions and 89 deletions.
diff --git a/.azure-pipelines/azure-pipelines-base.yml b/.azure-pipelines/azure-pipelines-base.yml
@@ -10,10 +10,10 @@ jobs:
   timeoutInMinutes: 360
   strategy:
     matrix:
-      Python_39:
-        python.version: "3.9"
+      Python_312:
+        python.version: "3.12"
         PyPIGithub: false
-        name: "Python 3.9"
+        name: "Python 3.12"
       Python_311:
         python.version: "3.11"
         PyPIGithub: false

diff --git a/.azure-pipelines/azure-pipelines-benchmarks.yml b/.azure-pipelines/azure-pipelines-benchmarks.yml
@@ -29,7 +29,7 @@ jobs:
       python -m pip install wheel
       python -m pip install --no-cache-dir -r requirements.txt
       python -m pip install --no-cache-dir -r requirements-optional.txt
-      python -m pip install asv
+      python -m pip install asv!=0.6.2
     displayName: 'Install requirements'
 
   - script: |

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### New features
 
 ### Maintenance and fixes
+- Fix deprecations introduced in latest pandas and xarray versions, and prepare for numpy 2.0 ones ([2315](https://github.com/arviz-devs/arviz/pull/2315)))
 
 -   Refactor ECDF code ([2311](https://github.com/arviz-devs/arviz/pull/2311))
 

diff --git a/arviz/data/inference_data.py b/arviz/data/inference_data.py
@@ -64,6 +64,8 @@
     "observed_data",
     "constant_data",
     "predictions_constant_data",
+    "unconstrained_posterior",
+    "unconstrained_prior",
 ]
 
 WARMUP_TAG = "warmup_"
@@ -1492,7 +1494,7 @@ def add_groups(self, group_dict=None, coords=None, dims=None, **kwargs):
 
             import numpy as np
             rng = np.random.default_rng(73)
-            ary = rng.normal(size=(post.dims["chain"], post.dims["draw"], obs.dims["match"]))
+            ary = rng.normal(size=(post.sizes["chain"], post.sizes["draw"], obs.sizes["match"]))
             idata.add_groups(
                 log_likelihood={"home_points": ary},
                 dims={"home_points": ["match"]},

diff --git a/arviz/plots/backends/bokeh/bpvplot.py b/arviz/plots/backends/bokeh/bpvplot.py
@@ -171,8 +171,13 @@ def plot_bpv(
                 ax_i.line(0, 0, legend_label=f"bpv={p_value:.2f}", alpha=0)
 
             if plot_mean:
-                ax_i.circle(
-                    obs_vals.mean(), 0, fill_color=color, line_color="black", size=markersize
+                ax_i.scatter(
+                    obs_vals.mean(),
+                    0,
+                    fill_color=color,
+                    line_color="black",
+                    size=markersize,
+                    marker="circle",
                 )
 
         _title = Title()

diff --git a/arviz/plots/backends/bokeh/compareplot.py b/arviz/plots/backends/bokeh/compareplot.py
@@ -69,13 +69,14 @@ def plot_compare(
             err_ys.append((y, y))
 
         # plot them
-        dif_tri = ax.triangle(
+        dif_tri = ax.scatter(
             comp_df[information_criterion].iloc[1:],
             yticks_pos[1::2],
             line_color=plot_kwargs.get("color_dse", "grey"),
             fill_color=plot_kwargs.get("color_dse", "grey"),
             line_width=2,
             size=6,
+            marker="triangle",
         )
         dif_line = ax.multi_line(err_xs, err_ys, line_color=plot_kwargs.get("color_dse", "grey"))
 
@@ -85,13 +86,14 @@ def plot_compare(
         ax.yaxis.ticker = yticks_pos[::2]
         ax.yaxis.major_label_overrides = dict(zip(yticks_pos[::2], yticks_labels))
 
-    elpd_circ = ax.circle(
+    elpd_circ = ax.scatter(
         comp_df[information_criterion],
         yticks_pos[::2],
         line_color=plot_kwargs.get("color_ic", "black"),
         fill_color=None,
         line_width=2,
         size=6,
+        marker="circle",
     )
     elpd_label = [elpd_circ]
 
@@ -110,7 +112,7 @@ def plot_compare(
 
     labels.append(("ELPD", elpd_label))
 
-    scale = comp_df["scale"][0]
+    scale = comp_df["scale"].iloc[0]
 
     if insample_dev:
         p_ic = comp_df[f"p_{information_criterion.split('_')[1]}"]
@@ -120,13 +122,14 @@ def plot_compare(
             correction = -p_ic
         elif scale == "deviance":
             correction = -(2 * p_ic)
-        insample_circ = ax.circle(
+        insample_circ = ax.scatter(
             comp_df[information_criterion] + correction,
             yticks_pos[::2],
             line_color=plot_kwargs.get("color_insample_dev", "black"),
             fill_color=plot_kwargs.get("color_insample_dev", "black"),
             line_width=2,
             size=6,
+            marker="circle",
         )
         labels.append(("In-sample ELPD", [insample_circ]))
 

diff --git a/arviz/plots/backends/bokeh/forestplot.py b/arviz/plots/backends/bokeh/forestplot.py
@@ -640,15 +640,15 @@ def iterator(self):
             grouped_data = [[(0, datum)] for datum in self.data]
             skip_dims = self.combine_dims.union({"chain"})
         else:
-            grouped_data = [datum.groupby("chain") for datum in self.data]
+            grouped_data = [datum.groupby("chain", squeeze=False) for datum in self.data]
             skip_dims = self.combine_dims
 
         label_dict = OrderedDict()
         selection_list = []
         for name, grouped_datum in zip(self.model_names, grouped_data):
             for _, sub_data in grouped_datum:
                 datum_iter = xarray_var_iter(
-                    sub_data,
+                    sub_data.squeeze(),
                     var_names=[self.var_name],
                     skip_dims=skip_dims,
                     reverse_selections=True,

diff --git a/arviz/plots/backends/matplotlib/compareplot.py b/arviz/plots/backends/matplotlib/compareplot.py
@@ -84,7 +84,7 @@ def plot_compare(
     else:
         ax.set_yticks(yticks_pos[::2])
 
-    scale = comp_df["scale"][0]
+    scale = comp_df["scale"].iloc[0]
 
     if insample_dev:
         p_ic = comp_df[f"p_{information_criterion.split('_')[1]}"]

diff --git a/arviz/plots/backends/matplotlib/forestplot.py b/arviz/plots/backends/matplotlib/forestplot.py
@@ -536,15 +536,15 @@ def iterator(self):
             grouped_data = [[(0, datum)] for datum in self.data]
             skip_dims = self.combine_dims.union({"chain"})
         else:
-            grouped_data = [datum.groupby("chain") for datum in self.data]
+            grouped_data = [datum.groupby("chain", squeeze=False) for datum in self.data]
             skip_dims = self.combine_dims
 
         label_dict = OrderedDict()
         selection_list = []
         for name, grouped_datum in zip(self.model_names, grouped_data):
             for _, sub_data in grouped_datum:
                 datum_iter = xarray_var_iter(
-                    sub_data,
+                    sub_data.squeeze(),
                     var_names=[self.var_name],
                     skip_dims=skip_dims,
                     reverse_selections=True,

diff --git a/arviz/plots/backends/matplotlib/traceplot.py b/arviz/plots/backends/matplotlib/traceplot.py
@@ -430,7 +430,7 @@ def plot_trace(
             Line2D(
                 [], [], label=chain_id, **dealiase_sel_kwargs(legend_kwargs, chain_prop, chain_id)
             )
-            for chain_id in range(data.dims["chain"])
+            for chain_id in range(data.sizes["chain"])
         ]
         if combined:
             handles.insert(

diff --git a/arviz/plots/bfplot.py b/arviz/plots/bfplot.py
@@ -38,7 +38,7 @@ def plot_bf(
     algorithm presented in [1]_.
 
     Parameters
-    -----------
+    ----------
     idata : InferenceData
         Any object that can be converted to an :class:`arviz.InferenceData` object
         Refer to documentation of :func:`arviz.convert_to_dataset` for details.
@@ -52,16 +52,16 @@ def plot_bf(
         Tuple of valid Matplotlib colors. First element for the prior, second for the posterior.
     figsize : (float, float), optional
         Figure size. If `None` it will be defined automatically.
-    textsize: float, optional
+    textsize : float, optional
         Text size scaling factor for labels, titles and lines. If `None` it will be auto
         scaled based on `figsize`.
-    plot_kwargs : dicts, optional
+    plot_kwargs : dict, optional
         Additional keywords passed to :func:`matplotlib.pyplot.plot`.
-    hist_kwargs : dicts, optional
+    hist_kwargs : dict, optional
         Additional keywords passed to :func:`arviz.plot_dist`. Only works for discrete variables.
     ax : axes, optional
         :class:`matplotlib.axes.Axes` or :class:`bokeh.plotting.Figure`.
-    backend :{"matplotlib", "bokeh"}, default "matplotlib"
+    backend : {"matplotlib", "bokeh"}, default "matplotlib"
         Select plotting backend.
     backend_kwargs : dict, optional
         These are kwargs specific to the backend being used, passed to
@@ -78,7 +78,7 @@ def plot_bf(
     References
     ----------
     .. [1] Heck, D., 2019. A caveat on the Savage-Dickey density ratio:
-    The case of computing Bayes factors for regression parameters.
+       The case of computing Bayes factors for regression parameters.
 
     Examples
     --------
@@ -92,6 +92,7 @@ def plot_bf(
         >>> idata = az.from_dict(posterior={"a":np.random.normal(1, 0.5, 5000)},
         ...     prior={"a":np.random.normal(0, 1, 5000)})
         >>> az.plot_bf(idata, var_name="a", ref_val=0)
+
     """
     posterior = extract(idata, var_names=var_name).values
 

diff --git a/arviz/plots/bpvplot.py b/arviz/plots/bpvplot.py
@@ -230,11 +230,11 @@ def plot_bpv(
 
     if flatten_pp is None:
         if flatten is None:
-            flatten_pp = list(predictive_dataset.dims.keys())
+            flatten_pp = list(predictive_dataset.dims)
         else:
             flatten_pp = flatten
     if flatten is None:
-        flatten = list(observed.dims.keys())
+        flatten = list(observed.dims)
 
     if coords is None:
         coords = {}

diff --git a/arviz/plots/compareplot.py b/arviz/plots/compareplot.py
@@ -90,10 +90,10 @@ def plot_compare(
     References
     ----------
     .. [1] Vehtari et al. (2016). Practical Bayesian model evaluation using leave-one-out
-    cross-validation and WAIC https://arxiv.org/abs/1507.04544
+       cross-validation and WAIC https://arxiv.org/abs/1507.04544
 
     .. [2] McElreath R. (2022). Statistical Rethinking A Bayesian Course with Examples in
-    R and Stan, Second edition, CRC Press.
+       R and Stan, Second edition, CRC Press.
 
     Examples
     --------

diff --git a/arviz/plots/elpdplot.py b/arviz/plots/elpdplot.py
@@ -98,7 +98,7 @@ def plot_elpd(
     References
     ----------
     .. [1] Vehtari et al. (2016). Practical Bayesian model evaluation using leave-one-out
-    cross-validation and WAIC https://arxiv.org/abs/1507.04544
+       cross-validation and WAIC https://arxiv.org/abs/1507.04544
 
     Examples
     --------

diff --git a/arviz/plots/essplot.py b/arviz/plots/essplot.py
@@ -202,8 +202,8 @@ def plot_ess(
 
     data = get_coords(convert_to_dataset(idata, group="posterior"), coords)
     var_names = _var_names(var_names, data, filter_vars)
-    n_draws = data.dims["draw"]
-    n_samples = n_draws * data.dims["chain"]
+    n_draws = data.sizes["draw"]
+    n_samples = n_draws * data.sizes["chain"]
 
     ess_tail_dataset = None
     mean_ess = None

diff --git a/arviz/plots/pairplot.py b/arviz/plots/pairplot.py
@@ -229,7 +229,7 @@ def plot_pair(
             )
 
     if gridsize == "auto":
-        gridsize = int(dataset.dims["draw"] ** 0.35)
+        gridsize = int(dataset.sizes["draw"] ** 0.35)
 
     numvars = len(flat_var_names)
 

diff --git a/arviz/plots/ppcplot.py b/arviz/plots/ppcplot.py
@@ -269,11 +269,11 @@ def plot_ppc(
 
     if flatten_pp is None:
         if flatten is None:
-            flatten_pp = list(predictive_dataset.dims.keys())
+            flatten_pp = list(predictive_dataset.dims)
         else:
             flatten_pp = flatten
     if flatten is None:
-        flatten = list(observed_data.dims.keys())
+        flatten = list(observed_data.dims)
 
     if coords is None:
         coords = {}

diff --git a/arviz/stats/density_utils.py b/arviz/stats/density_utils.py
@@ -231,8 +231,8 @@ def _fixed_point(t, N, k_sq, a_sq):
        Z. I. Botev, J. F. Grotowski, and D. P. Kroese.
        Ann. Statist. 38 (2010), no. 5, 2916--2957.
     """
-    k_sq = np.asfarray(k_sq, dtype=np.float64)
-    a_sq = np.asfarray(a_sq, dtype=np.float64)
+    k_sq = np.asarray(k_sq, dtype=np.float64)
+    a_sq = np.asarray(a_sq, dtype=np.float64)
 
     l = 7
     f = np.sum(np.power(k_sq, l) * a_sq * np.exp(-k_sq * np.pi**2 * t))

diff --git a/arviz/stats/diagnostics.py b/arviz/stats/diagnostics.py
@@ -457,10 +457,10 @@ def ks_summary(pareto_tail_indices):
     """
     _numba_flag = Numba.numba_flag
     if _numba_flag:
-        bins = np.asarray([-np.Inf, 0.5, 0.7, 1, np.Inf])
+        bins = np.asarray([-np.inf, 0.5, 0.7, 1, np.inf])
         kcounts, *_ = _histogram(pareto_tail_indices, bins)
     else:
-        kcounts, *_ = _histogram(pareto_tail_indices, bins=[-np.Inf, 0.5, 0.7, 1, np.Inf])
+        kcounts, *_ = _histogram(pareto_tail_indices, bins=[-np.inf, 0.5, 0.7, 1, np.inf])
     kprop = kcounts / len(pareto_tail_indices) * 100
     df_k = pd.DataFrame(
         dict(_=["(good)", "(ok)", "(bad)", "(very bad)"], Count=kcounts, Pct=kprop)