diff --git a/.azure-pipelines/azure-pipelines-external.yml b/.azure-pipelines/azure-pipelines-external.yml
index 0a0fd953c1..fd142aa311 100644
--- a/.azure-pipelines/azure-pipelines-external.yml
+++ b/.azure-pipelines/azure-pipelines-external.yml
@@ -14,7 +14,6 @@ jobs:
         python.version: "3.9"
         pystan.version: "latest"
         cmdstanpy.version: "latest"
-        pymc3.version: "latest"
         emcee.version: "latest"
         name: "External latest"
 
@@ -22,7 +21,6 @@ jobs:
         python.version: "3.9"
         pystan.version: 2.19.1.1
         cmdstanpy.version: "github"
-        pymc3.version: "github"
         emcee.version: 2
         name: "External special"
 
@@ -74,14 +72,7 @@ jobs:
           python -m pip --no-cache-dir install "emcee<3"
       fi
 
-      if [ "$(pymc3.version)" = "github" ]; then
-           python -m pip --no-cache-dir --log log.txt install git+https://github.com/pymc-devs/pymc3@v3
-           cat log.txt
-      else
-          python -m pip --no-cache-dir install pymc3
-      fi
-
-      grep -Ev '^pystan|^cmdstanpy|^emcee|^pymc3' requirements-external.txt | xargs python -m pip install
+      grep -Ev '^pystan|^cmdstanpy|^emcee' requirements-external.txt | xargs python -m pip install
 
     displayName: 'Install packages'
 
diff --git a/arviz/data/__init__.py b/arviz/data/__init__.py
index f34b013c45..d81ecf9911 100644
--- a/arviz/data/__init__.py
+++ b/arviz/data/__init__.py
@@ -12,7 +12,6 @@
 from .io_netcdf import from_netcdf, to_netcdf
 from .io_numpyro import from_numpyro
 from .io_pyjags import from_pyjags
-from .io_pymc3 import from_pymc3, from_pymc3_predictions
 from .io_pyro import from_pyro
 from .io_pystan import from_pystan
 from .utils import extract, extract_dataset
@@ -31,8 +30,6 @@
     "convert_to_inference_data",
     "from_beanmachine",
     "from_pyjags",
-    "from_pymc3",
-    "from_pymc3_predictions",
     "from_pystan",
     "from_emcee",
     "from_cmdstan",
diff --git a/arviz/data/converters.py b/arviz/data/converters.py
index 01cb4ec010..2961f0aaf1 100644
--- a/arviz/data/converters.py
+++ b/arviz/data/converters.py
@@ -9,7 +9,6 @@
 from .io_cmdstanpy import from_cmdstanpy
 from .io_emcee import from_emcee
 from .io_numpyro import from_numpyro
-from .io_pymc3 import from_pymc3
 from .io_pyro import from_pyro
 from .io_pystan import from_pystan
 
@@ -23,14 +22,13 @@ def convert_to_inference_data(obj, *, group="posterior", coords=None, dims=None,
 
     Parameters
     ----------
-    obj : dict, str, np.ndarray, xr.Dataset, pystan fit, pymc3 trace
+    obj : dict, str, np.ndarray, xr.Dataset, pystan fit
         A supported object to convert to InferenceData:
             | InferenceData: returns unchanged
             | str: Attempts to load the cmdstan csv or netcdf dataset from disk
             | pystan fit: Automatically extracts data
             | cmdstanpy fit: Automatically extracts data
             | cmdstan csv-list: Automatically extracts data
-            | pymc3 trace: Automatically extracts data
             | emcee sampler: Automatically extracts data
             | pyro MCMC: Automatically extracts data
             | beanmachine MonteCarloSamples: Automatically extracts data
@@ -89,8 +87,6 @@ def convert_to_inference_data(obj, *, group="posterior", coords=None, dims=None,
             return from_cmdstanpy(**kwargs)
         else:  # pystan or pystan3
             return from_pystan(**kwargs)
-    elif obj.__class__.__name__ == "MultiTrace":  # ugly, but doesn't make PyMC3 a requirement
-        return from_pymc3(trace=kwargs.pop(group), **kwargs)
     elif obj.__class__.__name__ == "EnsembleSampler":  # ugly, but doesn't make emcee a requirement
         return from_emcee(sampler=kwargs.pop(group), **kwargs)
     elif obj.__class__.__name__ == "MonteCarloSamples":
@@ -125,7 +121,6 @@ def convert_to_inference_data(obj, *, group="posterior", coords=None, dims=None,
             "netcdf filename",
             "numpy array",
             "pystan fit",
-            "pymc3 trace",
             "emcee fit",
             "pyro mcmc fit",
             "numpyro mcmc fit",
@@ -152,13 +147,12 @@ def convert_to_dataset(obj, *, group="posterior", coords=None, dims=None):
 
     Parameters
     ----------
-    obj : dict, str, np.ndarray, xr.Dataset, pystan fit, pymc3 trace
+    obj : dict, str, np.ndarray, xr.Dataset, pystan fit
         A supported object to convert to InferenceData:
 
         - InferenceData: returns unchanged
         - str: Attempts to load the netcdf dataset from disk
         - pystan fit: Automatically extracts data
-        - pymc3 trace: Automatically extracts data
         - xarray.Dataset: adds to InferenceData as only group
         - xarray.DataArray: creates an xarray dataset as the only group, gives the
           array an arbitrary name, if name not set
diff --git a/arviz/data/io_pymc3.py b/arviz/data/io_pymc3.py
deleted file mode 100644
index 765062f3da..0000000000
--- a/arviz/data/io_pymc3.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# pylint: disable=unused-import
-"""PyMC3-specific conversion code."""
-import pkg_resources
-import packaging
-
-__all__ = ["from_pymc3", "from_pymc3_predictions"]
-
-try:
-    pymc3_version = pkg_resources.get_distribution("pymc3").version
-    PYMC3_V4 = packaging.version.parse(pymc3_version) >= packaging.version.parse("4.0")
-except pkg_resources.DistributionNotFound:
-    PYMC3_V4 = False
-
-
-if not PYMC3_V4:
-    from .io_pymc3_3x import from_pymc3, from_pymc3_predictions
-else:
-
-    def from_pymc3(
-        trace=None,
-        *,
-        prior=None,
-        posterior_predictive=None,
-        log_likelihood=None,
-        coords=None,
-        dims=None,
-        model=None,
-        save_warmup=None,
-        density_dist_obs=True,
-    ):
-        """Convert pymc3 data into an InferenceData object.
-
-        Placeholder for function moved to PyMC3.
-        """
-        raise NotImplementedError(
-            "The converter has been moved to PyMC3 codebase, use pymc3.to_inference_data"
-        )
-
-    def from_pymc3_predictions(
-        predictions,
-        posterior_trace=None,
-        model=None,
-        coords=None,
-        dims=None,
-        idata_orig=None,
-        inplace=False,
-    ):
-        """Translate out-of-sample predictions into ``InferenceData``.
-
-        Placeholder for function moved to PyMC3.
-        """
-        raise NotImplementedError(
-            "The converter has been moved to PyMC3 codebase, "
-            "use pymc3.to_inference_data_predictions"
-        )
diff --git a/arviz/data/io_pymc3_3x.py b/arviz/data/io_pymc3_3x.py
deleted file mode 100644
index 117946135f..0000000000
--- a/arviz/data/io_pymc3_3x.py
+++ /dev/null
@@ -1,654 +0,0 @@
-# pylint: disable=unused-import
-"""PyMC3-specific conversion code (PyMC3<4.0)."""
-import logging
-import warnings
-from types import ModuleType
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union
-
-import numpy as np
-import xarray as xr
-
-from .. import utils
-from ..rcparams import rcParams
-from .base import CoordSpec, DimSpec, dict_to_dataset, generate_dims_coords, make_attrs, requires
-from .inference_data import InferenceData, concat
-
-if TYPE_CHECKING:
-    from typing import Set  # pylint: disable=ungrouped-imports
-
-    import pymc3 as pm
-
-    try:
-        import aesara  # pylint: disable=unused-import
-    except ImportError:
-        import theano as aesara  # pylint: disable=unused-import
-    from pymc3 import Model, MultiTrace  # pylint: disable=invalid-name
-else:
-    MultiTrace = Any  # pylint: disable=invalid-name
-    Model = Any  # pylint: disable=invalid-name
-
-___all__ = [""]
-
-_log = logging.getLogger(__name__)
-
-Coords = Dict[str, List[Any]]
-Dims = Dict[str, List[str]]
-# random variable object ...
-Var = Any  # pylint: disable=invalid-name
-
-
-def _monkey_patch_pymc3(pm: ModuleType) -> None:  # pylint: disable=invalid-name
-    assert pm.__name__ == "pymc3"
-
-    def fixed_eq(self, other):
-        """Use object identity for MultiObservedRV equality."""
-        return self is other
-
-    if tuple((int(x) for x in pm.__version__.split("."))) < (3, 9):  # type: ignore
-        pm.model.MultiObservedRV.__eq__ = fixed_eq  # type: ignore
-
-
-class PyMC3Converter:  # pylint: disable=too-many-instance-attributes
-    """Encapsulate PyMC3 specific logic."""
-
-    model = None  # type: Optional[pm.Model]
-    nchains = None  # type: int
-    ndraws = None  # type: int
-    posterior_predictive = None  # Type: Optional[Dict[str, np.ndarray]]
-    predictions = None  # Type: Optional[Dict[str, np.ndarray]]
-    prior = None  # Type: Optional[Dict[str, np.ndarray]]
-
-    def __init__(
-        self,
-        *,
-        trace=None,
-        prior=None,
-        posterior_predictive=None,
-        log_likelihood=None,
-        predictions=None,
-        coords: Optional[Coords] = None,
-        dims: Optional[Dims] = None,
-        model=None,
-        save_warmup: Optional[bool] = None,
-        density_dist_obs: bool = True,
-    ):
-        import pymc3
-
-        try:
-            import aesara  # pylint: disable=redefined-outer-name
-        except ImportError:
-            import theano as aesara
-
-        _monkey_patch_pymc3(pymc3)
-
-        self.pymc3 = pymc3
-        self.aesara = aesara
-
-        self.save_warmup = rcParams["data.save_warmup"] if save_warmup is None else save_warmup
-        self.trace = trace
-
-        # this permits us to get the model from command-line argument or from with model:
-        try:
-            self.model = self.pymc3.modelcontext(model or self.model)
-        except TypeError as e:
-            _log.error("Got error %s trying to find log_likelihood in translation.", e)
-            self.model = None
-
-        if self.model is None:
-            warnings.warn(
-                "Using `from_pymc3` without the model will be deprecated in a future release. "
-                "Not using the model will return less accurate and less useful results. "
-                "Make sure you use the model argument or call from_pymc3 within a model context.",
-                FutureWarning,
-            )
-
-        # This next line is brittle and may not work forever, but is a secret
-        # way to access the model from the trace.
-        self.attrs = None
-        if trace is not None:
-            if isinstance(self.trace, InferenceData):
-                raise ValueError(
-                    "Using the `InferenceData` as a `trace` argument won't work. "
-                    "Please use the `arviz.InferenceData.extend` method to extend the "
-                    "`InferenceData` with groups from another `InferenceData`."
-                )
-            if self.model is None:
-                self.model = list(self.trace._straces.values())[  # pylint: disable=protected-access
-                    0
-                ].model
-            self.nchains = trace.nchains if hasattr(trace, "nchains") else 1
-            if hasattr(trace.report, "n_draws") and trace.report.n_draws is not None:
-                self.ndraws = trace.report.n_draws
-                self.attrs = {
-                    "sampling_time": trace.report.t_sampling,
-                    "tuning_steps": trace.report.n_tune,
-                }
-            else:
-                self.ndraws = len(trace)
-                if self.save_warmup:
-                    warnings.warn(
-                        "Warmup samples will be stored in posterior group and will not be"
-                        " excluded from stats and diagnostics."
-                        " Please consider using PyMC3>=3.9 and do not slice the trace manually.",
-                        UserWarning,
-                    )
-            self.ntune = len(self.trace) - self.ndraws
-            self.posterior_trace, self.warmup_trace = self.split_trace()
-        else:
-            self.nchains = self.ndraws = 0
-
-        self.prior = prior
-        self.posterior_predictive = posterior_predictive
-        self.log_likelihood = (
-            rcParams["data.log_likelihood"] if log_likelihood is None else log_likelihood
-        )
-        self.predictions = predictions
-
-        def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray:
-            return next(iter(dct.values()))
-
-        if trace is None:
-            # if you have a posterior_predictive built with keep_dims,
-            # you'll lose here, but there's nothing I can do about that.
-            self.nchains = 1
-            get_from = None
-            if predictions is not None:
-                get_from = predictions
-            elif posterior_predictive is not None:
-                get_from = posterior_predictive
-            elif prior is not None:
-                get_from = prior
-            if get_from is None:
-                # pylint: disable=line-too-long
-                raise ValueError(
-                    "When constructing InferenceData must have at least"
-                    " one of trace, prior, posterior_predictive or predictions."
-                )
-
-            aelem = arbitrary_element(get_from)
-            self.ndraws = aelem.shape[0]
-
-        self.coords = {} if coords is None else coords
-        if hasattr(self.model, "coords"):
-            self.coords = {**self.model.coords, **self.coords}
-
-        self.dims = {} if dims is None else dims
-        if hasattr(self.model, "RV_dims"):
-            model_dims = {k: list(v) for k, v in self.model.RV_dims.items()}
-            self.dims = {**model_dims, **self.dims}
-
-        self.density_dist_obs = density_dist_obs
-        self.observations, self.multi_observations = self.find_observations()
-
-    def find_observations(self) -> Tuple[Optional[Dict[str, Var]], Optional[Dict[str, Var]]]:
-        """If there are observations available, return them as a dictionary."""
-        if self.model is None:
-            return (None, None)
-        observations = {}
-        multi_observations = {}
-        for obs in self.model.observed_RVs:
-            if hasattr(obs, "observations"):
-                observations[obs.name] = obs.observations
-            elif hasattr(obs, "data") and self.density_dist_obs:
-                for key, val in obs.data.items():
-                    multi_observations[key] = val.eval() if hasattr(val, "eval") else val
-        return observations, multi_observations
-
-    def split_trace(self) -> Tuple[Union[None, MultiTrace], Union[None, MultiTrace]]:
-        """Split MultiTrace object into posterior and warmup.
-
-        Returns
-        -------
-        trace_posterior: pymc3.MultiTrace or None
-            The slice of the trace corresponding to the posterior. If the posterior
-            trace is empty, None is returned
-        trace_warmup: pymc3.MultiTrace or None
-            The slice of the trace corresponding to the warmup. If the warmup trace is
-            empty or ``save_warmup=False``, None is returned
-        """
-        trace_posterior = None
-        trace_warmup = None
-        if self.save_warmup and self.ntune > 0:
-            trace_warmup = self.trace[: self.ntune]
-        if self.ndraws > 0:
-            trace_posterior = self.trace[self.ntune :]
-        return trace_posterior, trace_warmup
-
-    def log_likelihood_vals_point(self, point, var, log_like_fun):
-        """Compute log likelihood for each observed point."""
-        log_like_val = utils.one_de(log_like_fun(point))
-        if var.missing_values:
-            mask = var.observations.mask
-            if np.ndim(mask) > np.ndim(log_like_val):
-                mask = np.any(mask, axis=-1)
-            log_like_val = np.where(mask, np.nan, log_like_val)
-        return log_like_val
-
-    def _extract_log_likelihood(self, trace):
-        """Compute log likelihood of each observation."""
-        if self.trace is None:
-            return None
-        if self.model is None:
-            return None
-
-        # If we have predictions, then we have a thinned trace which does not
-        # support extracting a log likelihood.
-        if self.log_likelihood is True:
-            cached = [(var, var.logp_elemwise) for var in self.model.observed_RVs]
-        else:
-            cached = [
-                (var, var.logp_elemwise)
-                for var in self.model.observed_RVs
-                if var.name in self.log_likelihood
-            ]
-        try:
-            log_likelihood_dict = (
-                self.pymc3.sampling._DefaultTrace(  # pylint: disable=protected-access
-                    len(trace.chains)
-                )
-            )
-        except AttributeError as err:
-            raise AttributeError(
-                "Installed version of ArviZ requires PyMC3>=3.8. Please upgrade with "
-                "`pip install pymc3>=3.8` or `conda install -c conda-forge pymc3>=3.8`."
-            ) from err
-        for var, log_like_fun in cached:
-            try:
-                for k, chain in enumerate(trace.chains):
-                    log_like_chain = [
-                        self.log_likelihood_vals_point(point, var, log_like_fun)
-                        for point in trace.points([chain])
-                    ]
-                    log_likelihood_dict.insert(var.name, np.stack(log_like_chain), k)
-            except TypeError as e:
-                raise TypeError(
-                    *tuple(["While computing log-likelihood for {var}: "] + list(e.args))
-                ) from e
-        return log_likelihood_dict.trace_dict
-
-    @requires("trace")
-    def posterior_to_xarray(self):
-        """Convert the posterior to an xarray dataset."""
-        var_names = self.pymc3.util.get_default_varnames(
-            self.trace.varnames, include_transformed=False
-        )
-        data = {}
-        data_warmup = {}
-        for var_name in var_names:
-            if self.warmup_trace:
-                data_warmup[var_name] = np.array(
-                    self.warmup_trace.get_values(var_name, combine=False, squeeze=False)
-                )
-            if self.posterior_trace:
-                data[var_name] = np.array(
-                    self.posterior_trace.get_values(var_name, combine=False, squeeze=False)
-                )
-        return (
-            dict_to_dataset(
-                data, library=self.pymc3, coords=self.coords, dims=self.dims, attrs=self.attrs
-            ),
-            dict_to_dataset(
-                data_warmup,
-                library=self.pymc3,
-                coords=self.coords,
-                dims=self.dims,
-                attrs=self.attrs,
-            ),
-        )
-
-    @requires("trace")
-    def sample_stats_to_xarray(self):
-        """Extract sample_stats from PyMC3 trace."""
-        data = {}
-        rename_key = {
-            "model_logp": "lp",
-            "mean_tree_accept": "acceptance_rate",
-            "depth": "tree_depth",
-            "tree_size": "n_steps",
-        }
-        data = {}
-        data_warmup = {}
-        for stat in self.trace.stat_names:
-            name = rename_key.get(stat, stat)
-            if name == "tune":
-                continue
-            if self.warmup_trace:
-                data_warmup[name] = np.array(
-                    self.warmup_trace.get_sampler_stats(stat, combine=False)
-                )
-            if self.posterior_trace:
-                data[name] = np.array(self.posterior_trace.get_sampler_stats(stat, combine=False))
-
-        return (
-            dict_to_dataset(
-                data, library=self.pymc3, dims=None, coords=self.coords, attrs=self.attrs
-            ),
-            dict_to_dataset(
-                data_warmup, library=self.pymc3, dims=None, coords=self.coords, attrs=self.attrs
-            ),
-        )
-
-    @requires("trace")
-    @requires("model")
-    def log_likelihood_to_xarray(self):
-        """Extract log likelihood and log_p data from PyMC3 trace."""
-        if self.predictions or not self.log_likelihood:
-            return None
-        data_warmup = {}
-        data = {}
-        warn_msg = (
-            "Could not compute log_likelihood, it will be omitted. "
-            "Check your model object or set log_likelihood=False"
-        )
-        if self.posterior_trace:
-            try:
-                data = self._extract_log_likelihood(self.posterior_trace)
-            except TypeError:
-                warnings.warn(warn_msg)
-        if self.warmup_trace:
-            try:
-                data_warmup = self._extract_log_likelihood(self.warmup_trace)
-            except TypeError:
-                warnings.warn(warn_msg)
-        return (
-            dict_to_dataset(
-                data, library=self.pymc3, dims=self.dims, coords=self.coords, skip_event_dims=True
-            ),
-            dict_to_dataset(
-                data_warmup,
-                library=self.pymc3,
-                dims=self.dims,
-                coords=self.coords,
-                skip_event_dims=True,
-            ),
-        )
-
-    def translate_posterior_predictive_dict_to_xarray(self, dct) -> xr.Dataset:
-        """Take Dict of variables to numpy ndarrays (samples) and translate into dataset."""
-        data = {}
-        for k, ary in dct.items():
-            shape = ary.shape
-            if shape[0] == self.nchains and shape[1] == self.ndraws:
-                data[k] = ary
-            elif shape[0] == self.nchains * self.ndraws:
-                data[k] = ary.reshape((self.nchains, self.ndraws, *shape[1:]))
-            else:
-                data[k] = utils.expand_dims(ary)
-                # pylint: disable=line-too-long
-                _log.warning(
-                    "posterior predictive variable %s's shape not compatible with number of chains and draws. "
-                    "This can mean that some draws or even whole chains are not represented.",
-                    k,
-                )
-        return dict_to_dataset(data, library=self.pymc3, coords=self.coords, dims=self.dims)
-
-    @requires(["posterior_predictive"])
-    def posterior_predictive_to_xarray(self):
-        """Convert posterior_predictive samples to xarray."""
-        return self.translate_posterior_predictive_dict_to_xarray(self.posterior_predictive)
-
-    @requires(["predictions"])
-    def predictions_to_xarray(self):
-        """Convert predictions (out of sample predictions) to xarray."""
-        return self.translate_posterior_predictive_dict_to_xarray(self.predictions)
-
-    def priors_to_xarray(self):
-        """Convert prior samples (and if possible prior predictive too) to xarray."""
-        if self.prior is None:
-            return {"prior": None, "prior_predictive": None}
-        if self.observations is not None:
-            prior_predictive_vars = list(self.observations.keys())
-            prior_vars = [key for key in self.prior.keys() if key not in prior_predictive_vars]
-        else:
-            prior_vars = list(self.prior.keys())
-            prior_predictive_vars = None
-
-        priors_dict = {
-            group: (
-                None
-                if var_names is None
-                else dict_to_dataset(
-                    {k: utils.expand_dims(self.prior[k]) for k in var_names},
-                    library=self.pymc3,
-                    coords=self.coords,
-                    dims=self.dims,
-                )
-            )
-            for group, var_names in zip(
-                ("prior", "prior_predictive"), (prior_vars, prior_predictive_vars)
-            )
-        }
-        return priors_dict
-
-    @requires(["observations", "multi_observations"])
-    @requires("model")
-    def observed_data_to_xarray(self):
-        """Convert observed data to xarray."""
-        if self.predictions:
-            return None
-        dims = {} if self.dims is None else self.dims
-        observed_data = {}
-        for name, vals in {**self.observations, **self.multi_observations}.items():
-            if hasattr(vals, "get_value"):
-                vals = vals.get_value()
-            vals = utils.one_de(vals)
-            val_dims = dims.get(name)
-            val_dims, coords = generate_dims_coords(
-                vals.shape, name, dims=val_dims, coords=self.coords
-            )
-            # filter coords based on the dims
-            coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims}
-            observed_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords)
-        return xr.Dataset(data_vars=observed_data, attrs=make_attrs(library=self.pymc3))
-
-    @requires(["trace", "predictions"])
-    @requires("model")
-    def constant_data_to_xarray(self):
-        """Convert constant data to xarray."""
-        # For constant data, we are concerned only with deterministics and data.
-        # The constant data vars must be either pm.Data (TensorSharedVariable) or pm.Deterministic
-        constant_data_vars = {}  # type: Dict[str, Var]
-        for var in self.model.deterministics:
-            if hasattr(self.aesara, "gof"):
-                ancestors_func = self.aesara.gof.graph.ancestors  # pylint: disable=no-member
-            else:
-                ancestors_func = self.aesara.graph.basic.ancestors  # pylint: disable=no-member
-            ancestors = ancestors_func(var.owner.inputs)
-            # no dependency on a random variable
-            if not any((isinstance(a, self.pymc3.model.PyMC3Variable) for a in ancestors)):
-                constant_data_vars[var.name] = var
-
-        def is_data(name, var) -> bool:
-            assert self.model is not None
-            return (
-                var not in self.model.deterministics
-                and var not in self.model.observed_RVs
-                and var not in self.model.free_RVs
-                and var not in self.model.potentials
-                and (self.observations is None or name not in self.observations)
-            )
-
-        # I don't know how to find pm.Data, except that they are named variables that aren't
-        # observed or free RVs, nor are they deterministics, and then we eliminate observations.
-        for name, var in self.model.named_vars.items():
-            if is_data(name, var):
-                constant_data_vars[name] = var
-
-        if not constant_data_vars:
-            return None
-        if self.dims is None:
-            dims = {}
-        else:
-            dims = self.dims
-        constant_data = {}
-        for name, vals in constant_data_vars.items():
-            if hasattr(vals, "get_value"):
-                vals = vals.get_value()
-            # this might be a Deterministic, and must be evaluated
-            elif hasattr(self.model[name], "eval"):
-                vals = self.model[name].eval()
-            vals = np.atleast_1d(vals)
-            val_dims = dims.get(name)
-            val_dims, coords = generate_dims_coords(
-                vals.shape, name, dims=val_dims, coords=self.coords
-            )
-            # filter coords based on the dims
-            coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims}
-            try:
-                constant_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords)
-            except ValueError as err:
-                raise ValueError(f"Error translating constant_data variable {name}: {err}") from err
-        return xr.Dataset(data_vars=constant_data, attrs=make_attrs(library=self.pymc3))
-
-    def to_inference_data(self):
-        """Convert all available data to an InferenceData object.
-
-        Note that if groups can not be created (e.g., there is no `trace`, so
-        the `posterior` and `sample_stats` can not be extracted), then the InferenceData
-        will not have those groups.
-        """
-        id_dict = {
-            "posterior": self.posterior_to_xarray(),
-            "sample_stats": self.sample_stats_to_xarray(),
-            "log_likelihood": self.log_likelihood_to_xarray(),
-            "posterior_predictive": self.posterior_predictive_to_xarray(),
-            "predictions": self.predictions_to_xarray(),
-            **self.priors_to_xarray(),
-            "observed_data": self.observed_data_to_xarray(),
-        }
-        if self.predictions:
-            id_dict["predictions_constant_data"] = self.constant_data_to_xarray()
-        else:
-            id_dict["constant_data"] = self.constant_data_to_xarray()
-        return InferenceData(save_warmup=self.save_warmup, **id_dict)
-
-
-def from_pymc3(
-    trace=None,
-    *,
-    prior=None,
-    posterior_predictive=None,
-    log_likelihood=None,
-    coords=None,
-    dims=None,
-    model=None,
-    save_warmup=None,
-    density_dist_obs=True,
-):
-    """Convert pymc3 data into an InferenceData object.
-
-    All three of them are optional arguments, but at least one of ``trace``,
-    ``prior`` and ``posterior_predictive`` must be present.
-    For a usage example read the
-    :ref:`Creating InferenceData section on from_pymc3 <creating_InferenceData>`
-
-    Parameters
-    ----------
-    trace : pymc3.MultiTrace, optional
-        Trace generated from MCMC sampling. Output of
-        :py:func:`pymc3:pymc3.sampling.sample`.
-    prior : dict, optional
-        Dictionary with the variable names as keys, and values numpy arrays
-        containing prior and prior predictive samples.
-    posterior_predictive : dict, optional
-        Dictionary with the variable names as keys, and values numpy arrays
-        containing posterior predictive samples.
-    log_likelihood : bool or array_like of str, optional
-        List of variables to calculate `log_likelihood`. Defaults to True which calculates
-        `log_likelihood` for all observed variables. If set to False, log_likelihood is skipped.
-        Defaults to the value of rcParam ``data.log_likelihood``.
-    coords : dict of {str: array-like}, optional
-        Map of coordinate names to coordinate values
-    dims : dict of {str: list of str}, optional
-        Map of variable names to the coordinate names to use to index its dimensions.
-    model : pymc3.Model, optional
-        Model used to generate ``trace``. It is not necessary to pass ``model`` if in
-        ``with`` context.
-    save_warmup : bool, optional
-        Save warmup iterations InferenceData object. If not defined, use default
-        defined by the rcParams.
-    density_dist_obs : bool, default True
-        Store variables passed with ``observed`` arg to
-        :class:`pymc3:pymc.distributions.DensityDist` in the generated InferenceData.
-
-    Returns
-    -------
-    InferenceData
-    """
-    return PyMC3Converter(
-        trace=trace,
-        prior=prior,
-        posterior_predictive=posterior_predictive,
-        log_likelihood=log_likelihood,
-        coords=coords,
-        dims=dims,
-        model=model,
-        save_warmup=save_warmup,
-        density_dist_obs=density_dist_obs,
-    ).to_inference_data()
-
-
-### Later I could have this return ``None`` if the ``idata_orig`` argument is supplied.  But
-### perhaps we should have an inplace argument?
-def from_pymc3_predictions(
-    predictions,
-    posterior_trace=None,
-    model=None,
-    coords=None,
-    dims=None,
-    idata_orig=None,
-    inplace=False,
-):
-    """Translate out-of-sample predictions into ``InferenceData``.
-
-    Parameters
-    ----------
-    predictions: Dict[str, np.ndarray]
-        The predictions are the return value of ``pymc3.sample_posterior_predictive``,
-        a dictionary of strings (variable names) to numpy ndarrays (draws).
-    posterior_trace: pm.MultiTrace
-        This should be a trace that has been thinned appropriately for
-        ``pymc3.sample_posterior_predictive``. Specifically, any variable whose shape is
-        a deterministic function of the shape of any predictor (explanatory, independent, etc.)
-        variables must be *removed* from this trace.
-    model: pymc3.Model
-        This argument is *not* optional, unlike in conventional uses of ``from_pymc3``.
-        The reason is that the posterior_trace argument is likely to supply an incorrect
-        value of model.
-    coords: Dict[str, array-like[Any]]
-        Coordinates for the variables.  Map from coordinate names to coordinate values.
-    dims: Dict[str, array-like[str]]
-        Map from variable name to ordered set of coordinate names.
-    idata_orig: InferenceData, optional
-        If supplied, then modify this inference data in place, adding ``predictions`` and
-        (if available) ``predictions_constant_data`` groups. If this is not supplied, make a
-        fresh InferenceData
-    inplace: boolean, optional
-        If idata_orig is supplied and inplace is True, merge the predictions into idata_orig,
-        rather than returning a fresh InferenceData object.
-
-    Returns
-    -------
-    InferenceData:
-        May be modified ``idata_orig``.
-    """
-    if inplace and not idata_orig:
-        raise ValueError(
-            (
-                "Do not pass True for inplace unless passing"
-                "an existing InferenceData as idata_orig"
-            )
-        )
-    new_idata = PyMC3Converter(
-        trace=posterior_trace, predictions=predictions, model=model, coords=coords, dims=dims
-    ).to_inference_data()
-    if idata_orig is None:
-        return new_idata
-    elif inplace:
-        concat([idata_orig, new_idata], dim=None, inplace=True)
-        return idata_orig
-    else:
-        # if we are not returning in place, then merge the old groups into the new inference
-        # data and return that.
-        concat([new_idata, idata_orig], dim=None, copy=True, inplace=True)
-        return new_idata
diff --git a/arviz/tests/external_tests/test_data_pymc.py b/arviz/tests/external_tests/test_data_pymc.py
deleted file mode 100644
index 2111fdb445..0000000000
--- a/arviz/tests/external_tests/test_data_pymc.py
+++ /dev/null
@@ -1,701 +0,0 @@
-# pylint: disable=no-member, invalid-name, redefined-outer-name, protected-access, too-many-public-methods
-from sys import version_info
-from typing import Dict, Tuple
-
-import numpy as np
-import pkg_resources
-import packaging
-import pandas as pd
-import pytest
-from numpy import ma
-
-from ... import (  # pylint: disable=wrong-import-position
-    InferenceData,
-    from_dict,
-    from_pymc3,
-    from_pymc3_predictions,
-)
-
-from ..helpers import (  # pylint: disable=unused-import, wrong-import-position
-    chains,
-    check_multiple_attrs,
-    draws,
-    eight_schools_params,
-    importorskip,
-    load_cached_models,
-)
-
-# Skip all tests unless running on pymc3 v3
-try:
-    pymc3_version = pkg_resources.get_distribution("pymc3").version
-    PYMC3_V4 = packaging.version.parse(pymc3_version) >= packaging.version.parse("4.0")
-    PYMC3_installed = True
-    import pymc3 as pm
-except pkg_resources.DistributionNotFound:
-    PYMC3_V4 = False
-    PYMC3_installed = False
-
-pytestmark = pytest.mark.skipif(
-    not PYMC3_installed or PYMC3_V4,
-    reason="Run tests only if pymc3 installed and its version is <4.0",
-)
-
-
-class TestDataPyMC3:
-    @pytest.fixture(scope="class")
-    def data(self, eight_schools_params, draws, chains):
-        class Data:
-            model, obj = load_cached_models(eight_schools_params, draws, chains, "pymc3")["pymc3"]
-
-        return Data
-
-    def get_inference_data(self, data, eight_schools_params):
-        with data.model:
-            prior = pm.sample_prior_predictive()
-            posterior_predictive = pm.sample_posterior_predictive(data.obj)
-
-        return (
-            from_pymc3(
-                trace=data.obj,
-                prior=prior,
-                posterior_predictive=posterior_predictive,
-                coords={"school": np.arange(eight_schools_params["J"])},
-                dims={"theta": ["school"], "eta": ["school"]},
-            ),
-            posterior_predictive,
-        )
-
-    def get_predictions_inference_data(
-        self, data, eight_schools_params, inplace
-    ) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
-        with data.model:
-            prior = pm.sample_prior_predictive()
-            posterior_predictive = pm.sample_posterior_predictive(data.obj)
-
-            idata = from_pymc3(
-                trace=data.obj,
-                prior=prior,
-                coords={"school": np.arange(eight_schools_params["J"])},
-                dims={"theta": ["school"], "eta": ["school"]},
-            )
-            assert isinstance(idata, InferenceData)
-            extended = from_pymc3_predictions(
-                posterior_predictive, idata_orig=idata, inplace=inplace
-            )
-            assert isinstance(extended, InferenceData)
-            assert (id(idata) == id(extended)) == inplace
-        return (extended, posterior_predictive)
-
-    def make_predictions_inference_data(
-        self, data, eight_schools_params
-    ) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
-        with data.model:
-            posterior_predictive = pm.sample_posterior_predictive(data.obj)
-            idata = from_pymc3_predictions(
-                posterior_predictive,
-                posterior_trace=data.obj,
-                coords={"school": np.arange(eight_schools_params["J"])},
-                dims={"theta": ["school"], "eta": ["school"]},
-            )
-            assert isinstance(idata, InferenceData)
-        return idata, posterior_predictive
-
-    def test_from_pymc(self, data, eight_schools_params, chains, draws):
-        inference_data, posterior_predictive = self.get_inference_data(data, eight_schools_params)
-        test_dict = {
-            "posterior": ["mu", "tau", "eta", "theta"],
-            "sample_stats": ["diverging", "lp", "~log_likelihood"],
-            "log_likelihood": ["obs"],
-            "posterior_predictive": ["obs"],
-            "prior": ["mu", "tau", "eta", "theta"],
-            "prior_predictive": ["obs"],
-            "observed_data": ["obs"],
-        }
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-        for key, values in posterior_predictive.items():
-            ivalues = inference_data.posterior_predictive[key]
-            for chain in range(chains):
-                assert np.all(
-                    np.isclose(ivalues[chain], values[chain * draws : (chain + 1) * draws])
-                )
-
-    def test_from_pymc_predictions(self, data, eight_schools_params):
-        """Test that we can add predictions to a previously-existing InferenceData."""
-        test_dict = {
-            "posterior": ["mu", "tau", "eta", "theta"],
-            "sample_stats": ["diverging", "lp"],
-            "log_likelihood": ["obs"],
-            "predictions": ["obs"],
-            "prior": ["mu", "tau", "eta", "theta"],
-            "observed_data": ["obs"],
-        }
-
-        # check adding non-destructively
-        inference_data, posterior_predictive = self.get_predictions_inference_data(
-            data, eight_schools_params, False
-        )
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-        for key, values in posterior_predictive.items():
-            ivalues = inference_data.predictions[key]
-            assert ivalues.shape[0] == 1  # one chain in predictions
-            assert np.all(np.isclose(ivalues[0], values))
-
-        # check adding in place
-        inference_data, posterior_predictive = self.get_predictions_inference_data(
-            data, eight_schools_params, True
-        )
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-        for key, values in posterior_predictive.items():
-            ivalues = inference_data.predictions[key]
-            assert ivalues.shape[0] == 1  # one chain in predictions
-            assert np.all(np.isclose(ivalues[0], values))
-
-    def test_from_pymc_trace_inference_data(self):
-        """Check if the error is raised successfully after passing InferenceData as trace"""
-        idata = from_dict(
-            posterior={"A": np.random.randn(2, 10, 2), "B": np.random.randn(2, 10, 5, 2)}
-        )
-        assert isinstance(idata, InferenceData)
-        with pytest.raises(ValueError):
-            from_pymc3(trace=idata, model=pm.Model())
-
-    def test_from_pymc_predictions_new(self, data, eight_schools_params):
-        # check creating new
-        inference_data, posterior_predictive = self.make_predictions_inference_data(
-            data, eight_schools_params
-        )
-        test_dict = {
-            "posterior": ["mu", "tau", "eta", "theta"],
-            "predictions": ["obs"],
-            "~observed_data": [""],
-        }
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-        for key, values in posterior_predictive.items():
-            ivalues = inference_data.predictions[key]
-            # could the following better be done by simply flattening both the ivalues
-            # and the values?
-            if len(ivalues.shape) == 3:
-                ivalues_arr = np.reshape(
-                    ivalues.values, (ivalues.shape[0] * ivalues.shape[1], ivalues.shape[2])
-                )
-            elif len(ivalues.shape) == 2:
-                ivalues_arr = np.reshape(ivalues.values, (ivalues.shape[0] * ivalues.shape[1]))
-            else:
-                raise ValueError(f"Unexpected values shape for variable {key}")
-            assert (ivalues.shape[0] == 2) and (ivalues.shape[1] == 500)
-            assert values.shape[0] == 1000
-            assert np.all(np.isclose(ivalues_arr, values))
-
-    def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools_params):
-        with data.model:
-            posterior_predictive = pm.sample_posterior_predictive(data.obj, keep_size=True)
-            inference_data = from_pymc3(
-                trace=data.obj,
-                posterior_predictive=posterior_predictive,
-                coords={"school": np.arange(eight_schools_params["J"])},
-                dims={"theta": ["school"], "eta": ["school"]},
-            )
-
-        shape = inference_data.posterior_predictive.obs.shape
-        assert np.all(
-            [obs_s == s for obs_s, s in zip(shape, (chains, draws, eight_schools_params["J"]))]
-        )
-
-    def test_posterior_predictive_warning(self, data, eight_schools_params, caplog):
-        with data.model:
-            posterior_predictive = pm.sample_posterior_predictive(data.obj, 370)
-            inference_data = from_pymc3(
-                trace=data.obj,
-                posterior_predictive=posterior_predictive,
-                coords={"school": np.arange(eight_schools_params["J"])},
-                dims={"theta": ["school"], "eta": ["school"]},
-            )
-
-        records = caplog.records
-        shape = inference_data.posterior_predictive.obs.shape
-        assert np.all([obs_s == s for obs_s, s in zip(shape, (1, 370, eight_schools_params["J"]))])
-        assert len(records) == 1
-        assert records[0].levelname == "WARNING"
-
-    @pytest.mark.skipif(
-        packaging.version.Version(pm.__version__) < packaging.version.Version("3.9.0"),
-        reason="Requires PyMC3 >= 3.9.0",
-    )
-    @pytest.mark.parametrize("use_context", [True, False])
-    def test_autodetect_coords_from_model(self, use_context):
-        df_data = pd.DataFrame(columns=["date"]).set_index("date")
-        dates = pd.date_range(start="2020-05-01", end="2020-05-20")
-        for city, mu in {"Berlin": 15, "San Marino": 18, "Paris": 16}.items():
-            df_data[city] = np.random.normal(  # pylint: disable=unsupported-assignment-operation
-                loc=mu, size=len(dates)
-            )
-        df_data.index = dates
-        df_data.index.name = "date"
-
-        coords = {"date": df_data.index, "city": df_data.columns}
-        with pm.Model(coords=coords) as model:
-            europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sd=3.0)
-            city_offset = pm.Normal("city_offset", mu=0.0, sd=3.0, dims="city")
-            city_temperature = pm.Deterministic(
-                "city_temperature", europe_mean + city_offset, dims="city"
-            )
-
-            data_dims = ("date", "city")
-            data = pm.Data("data", df_data, dims=data_dims)
-            _ = pm.Normal("likelihood", mu=city_temperature, sd=0.5, observed=data, dims=data_dims)
-
-            trace = pm.sample(
-                return_inferencedata=False,
-                compute_convergence_checks=False,
-                cores=1,
-                chains=1,
-                tune=20,
-                draws=30,
-                step=pm.Metropolis(),
-            )
-            if use_context:
-                idata = from_pymc3(trace=trace)
-        if not use_context:
-            idata = from_pymc3(trace=trace, model=model)
-
-        assert "city" in list(idata.posterior.dims)
-        assert "city" in list(idata.observed_data.dims)
-        assert "date" in list(idata.observed_data.dims)
-        np.testing.assert_array_equal(idata.posterior.coords["city"], coords["city"])
-        np.testing.assert_array_equal(idata.observed_data.coords["date"], coords["date"])
-        np.testing.assert_array_equal(idata.observed_data.coords["city"], coords["city"])
-
-    def test_ovewrite_model_coords_dims(self):
-        """Check coords and dims from model object can be partially overwrited."""
-        dim1 = ["a", "b"]
-        new_dim1 = ["c", "d"]
-        coords = {"dim1": dim1, "dim2": ["c1", "c2"]}
-        x_data = np.arange(4).reshape((2, 2))
-        y = x_data + np.random.normal(size=(2, 2))
-        with pm.Model(coords=coords):
-            x = pm.Data("x", x_data, dims=("dim1", "dim2"))
-            beta = pm.Normal("beta", 0, 1, dims="dim1")
-            _ = pm.Normal("obs", x * beta, 1, observed=y, dims=("dim1", "dim2"))
-            trace = pm.sample(100, tune=100)
-            idata1 = from_pymc3(trace)
-            idata2 = from_pymc3(trace, coords={"dim1": new_dim1}, dims={"beta": ["dim2"]})
-
-        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
-        fails1 = check_multiple_attrs(test_dict, idata1)
-        assert not fails1
-        fails2 = check_multiple_attrs(test_dict, idata2)
-        assert not fails2
-        assert "dim1" in list(idata1.posterior.beta.dims)
-        assert "dim2" in list(idata2.posterior.beta.dims)
-        assert np.all(idata1.constant_data.x.dim1.values == np.array(dim1))
-        assert np.all(idata1.constant_data.x.dim2.values == np.array(["c1", "c2"]))
-        assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1))
-        assert np.all(idata2.constant_data.x.dim2.values == np.array(["c1", "c2"]))
-
-    def test_missing_data_model(self):
-        # source pymc3/pymc3/tests/test_missing.py
-        data = ma.masked_values([1, 2, -1, 4, -1], value=-1)
-        model = pm.Model()
-        with model:
-            x = pm.Normal("x", 1, 1)
-            pm.Normal("y", x, 1, observed=data)
-            trace = pm.sample(100, chains=2)
-
-        # make sure that data is really missing
-        (y_missing,) = model.missing_values
-        assert y_missing.tag.test_value.shape == (2,)
-        inference_data = from_pymc3(trace=trace, model=model)
-        test_dict = {"posterior": ["x"], "observed_data": ["y"], "log_likelihood": ["y"]}
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-
-    def test_mv_missing_data_model(self):
-        data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1)
-
-        model = pm.Model()
-        with model:
-            mu = pm.Normal("mu", 0, 1, shape=2)
-            sd_dist = pm.HalfNormal.dist(1.0)
-            chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True)
-            pm.MvNormal("y", mu=mu, chol=chol, observed=data)
-            trace = pm.sample(100, chains=2)
-
-        # make sure that data is really missing
-        (y_missing,) = model.missing_values
-        assert y_missing.tag.test_value.shape == (4,)
-        inference_data = from_pymc3(trace=trace, model=model)
-        test_dict = {
-            "posterior": ["mu", "chol_cov"],
-            "observed_data": ["y"],
-            "log_likelihood": ["y"],
-        }
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-
-    @pytest.mark.parametrize("log_likelihood", [True, False, ["y1"]])
-    def test_multiple_observed_rv(self, log_likelihood):
-        y1_data = np.random.randn(10)
-        y2_data = np.random.randn(100)
-        with pm.Model():
-            x = pm.Normal("x", 1, 1)
-            pm.Normal("y1", x, 1, observed=y1_data)
-            pm.Normal("y2", x, 1, observed=y2_data)
-            trace = pm.sample(100, chains=2)
-            inference_data = from_pymc3(trace=trace, log_likelihood=log_likelihood)
-        test_dict = {
-            "posterior": ["x"],
-            "observed_data": ["y1", "y2"],
-            "log_likelihood": ["y1", "y2"],
-            "sample_stats": ["diverging", "lp", "~log_likelihood"],
-        }
-        if not log_likelihood:
-            test_dict.pop("log_likelihood")
-            test_dict["~log_likelihood"] = [""]
-        if isinstance(log_likelihood, list):
-            test_dict["log_likelihood"] = ["y1", "~y2"]
-
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-
-    @pytest.mark.skipif(
-        version_info < (3, 6), reason="Requires updated PyMC3, which needs Python 3.6"
-    )
-    def test_multiple_observed_rv_without_observations(self):
-        with pm.Model():
-            mu = pm.Normal("mu")
-            x = pm.DensityDist(  # pylint: disable=unused-variable
-                "x", pm.Normal.dist(mu, 1.0).logp, observed={"value": 0.1}
-            )
-            trace = pm.sample(100, chains=2)
-            inference_data = from_pymc3(trace=trace)
-        test_dict = {
-            "posterior": ["mu"],
-            "sample_stats": ["lp"],
-            "log_likelihood": ["x"],
-            "observed_data": ["value", "~x"],
-        }
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-        assert inference_data.observed_data.value.dtype.kind == "f"
-
-    @pytest.mark.parametrize("multiobs", (True, False))
-    def test_multiobservedrv_to_observed_data(self, multiobs):
-        # fake regression data, with weights (W)
-        np.random.seed(2019)
-        N = 100
-        X = np.random.uniform(size=N)
-        W = 1 + np.random.poisson(size=N)
-        a, b = 5, 17
-        Y = a + np.random.normal(b * X)
-
-        with pm.Model():
-            a = pm.Normal("a", 0, 10)
-            b = pm.Normal("b", 0, 10)
-            mu = a + b * X
-            sigma = pm.HalfNormal("sigma", 1)
-
-            def weighted_normal(y, w):
-                return w * pm.Normal.dist(mu=mu, sd=sigma).logp(y)
-
-            y_logp = pm.DensityDist(  # pylint: disable=unused-variable
-                "y_logp", weighted_normal, observed={"y": Y, "w": W}
-            )
-            trace = pm.sample(20, tune=20)
-            idata = from_pymc3(trace, density_dist_obs=multiobs)
-        multiobs_str = "" if multiobs else "~"
-        test_dict = {
-            "posterior": ["a", "b", "sigma"],
-            "sample_stats": ["lp"],
-            "log_likelihood": ["y_logp"],
-            f"{multiobs_str}observed_data": ["y", "w"],
-        }
-        fails = check_multiple_attrs(test_dict, idata)
-        assert not fails
-        if multiobs:
-            assert idata.observed_data.y.dtype.kind == "f"
-
-    def test_single_observation(self):
-        with pm.Model():
-            p = pm.Uniform("p", 0, 1)
-            pm.Binomial("w", p=p, n=2, observed=1)
-            trace = pm.sample(500, chains=2)
-            inference_data = from_pymc3(trace=trace)
-
-        assert inference_data
-
-    def test_potential(self):
-        with pm.Model():
-            x = pm.Normal("x", 0.0, 1.0)
-            pm.Potential("z", pm.Normal.dist(x, 1.0).logp(np.random.randn(10)))
-            trace = pm.sample(100, chains=2)
-            inference_data = from_pymc3(trace=trace)
-
-        assert inference_data
-
-    @pytest.mark.parametrize("use_context", [True, False])
-    def test_constant_data(self, use_context):
-        """Test constant_data group behaviour."""
-        with pm.Model():
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
-            beta = pm.Normal("beta", 0, 1)
-            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
-            trace = pm.sample(100, tune=100)
-            if use_context:
-                inference_data = from_pymc3(trace=trace)
-
-        if not use_context:
-            inference_data = from_pymc3(trace=trace)
-        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-
-    def test_predictions_constant_data(self):
-        with pm.Model():
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
-            beta = pm.Normal("beta", 0, 1)
-            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
-            trace = pm.sample(100, tune=100)
-
-            inference_data = from_pymc3(trace=trace)
-        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-
-        with pm.Model():
-            x = pm.Data("x", [1.0, 2.0])
-            y = pm.Data("y", [1.0, 2.0])
-            beta = pm.Normal("beta", 0, 1)
-            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
-            predictive_trace = pm.sample_posterior_predictive(trace)
-            assert set(predictive_trace.keys()) == {"obs"}
-            # this should be four chains of 100 samples
-            # assert predictive_trace["obs"].shape == (400, 2)
-            # but the shape seems to vary between pymc3 versions
-            inference_data = from_pymc3_predictions(predictive_trace, posterior_trace=trace)
-        test_dict = {"posterior": ["beta"], "~observed_data": [""]}
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails, "Posterior data not copied over as expected."
-        test_dict = {"predictions": ["obs"]}
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails, "Predictions not instantiated as expected."
-        test_dict = {"predictions_constant_data": ["x"]}
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails, "Predictions constant data not instantiated as expected."
-
-    def test_no_trace(self):
-        with pm.Model() as model:
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
-            beta = pm.Normal("beta", 0, 1)
-            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
-            trace = pm.sample(100, tune=100)
-            prior = pm.sample_prior_predictive()
-            posterior_predictive = pm.sample_posterior_predictive(trace)
-
-        # Only prior
-        inference_data = from_pymc3(prior=prior, model=model)
-        test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]}
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-        # Only posterior_predictive
-        inference_data = from_pymc3(posterior_predictive=posterior_predictive, model=model)
-        test_dict = {"posterior_predictive": ["obs"]}
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-        # Prior and posterior_predictive but no trace
-        inference_data = from_pymc3(
-            prior=prior, posterior_predictive=posterior_predictive, model=model
-        )
-        test_dict = {
-            "prior": ["beta"],
-            "prior_predictive": ["obs"],
-            "posterior_predictive": ["obs"],
-        }
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-
-    @pytest.mark.parametrize("use_context", [True, False])
-    def test_priors_with_model(self, use_context):
-        """Test model is enough to get prior, prior predictive and observed_data."""
-        with pm.Model() as model:
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
-            beta = pm.Normal("beta", 0, 1)
-            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
-            prior = pm.sample_prior_predictive()
-
-        test_dict = {
-            "prior": ["beta", "~obs"],
-            "observed_data": ["obs"],
-            "prior_predictive": ["obs"],
-        }
-        if use_context:
-            with model:  # pylint: disable=not-context-manager
-                inference_data = from_pymc3(prior=prior)
-        else:
-            inference_data = from_pymc3(prior=prior, model=model)
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-
-    def test_no_model_deprecation(self):
-        with pm.Model():
-            x = pm.Data("x", [1.0, 2.0, 3.0])
-            y = pm.Data("y", [1.0, 2.0, 3.0])
-            beta = pm.Normal("beta", 0, 1)
-            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
-            prior = pm.sample_prior_predictive()
-
-        with pytest.warns(FutureWarning, match="without the model"):
-            inference_data = from_pymc3(prior=prior)
-        test_dict = {
-            "prior": ["beta", "obs"],
-            "~prior_predictive": [""],
-        }
-        fails = check_multiple_attrs(test_dict, inference_data)
-        assert not fails
-
-    def test_multivariate_observations(self):
-        coords = {"direction": ["x", "y", "z"], "experiment": np.arange(20)}
-        data = np.random.multinomial(20, [0.2, 0.3, 0.5], size=20)
-        with pm.Model(coords=coords):
-            p = pm.Beta("p", 1, 1, shape=(3,))
-            pm.Multinomial("y", 20, p, dims=("experiment", "direction"), observed=data)
-            idata = pm.sample(draws=50, tune=100, return_inferencedata=True)
-        test_dict = {
-            "posterior": ["p"],
-            "sample_stats": ["lp"],
-            "log_likelihood": ["y"],
-            "observed_data": ["y"],
-        }
-        fails = check_multiple_attrs(test_dict, idata)
-        assert not fails
-        assert "direction" not in idata.log_likelihood.dims
-        assert "direction" in idata.observed_data.dims
-
-
-class TestPyMC3WarmupHandling:
-    @pytest.mark.skipif(
-        not hasattr(pm.backends.base.SamplerReport, "n_draws"),
-        reason="requires pymc3 3.9 or higher",
-    )
-    @pytest.mark.parametrize("save_warmup", [False, True])
-    @pytest.mark.parametrize("chains", [1, 2])
-    @pytest.mark.parametrize("tune,draws", [(0, 50), (10, 40), (30, 0)])
-    def test_save_warmup(self, save_warmup, chains, tune, draws):
-        with pm.Model():
-            pm.Uniform("u1")
-            pm.Normal("n1")
-            trace = pm.sample(
-                tune=tune,
-                draws=draws,
-                chains=chains,
-                cores=1,
-                step=pm.Metropolis(),
-                discard_tuned_samples=False,
-            )
-            assert isinstance(trace, pm.backends.base.MultiTrace)
-            idata = from_pymc3(trace, save_warmup=save_warmup)
-        warmup_prefix = "" if save_warmup and (tune > 0) else "~"
-        post_prefix = "" if draws > 0 else "~"
-        test_dict = {
-            f"{post_prefix}posterior": ["u1", "n1"],
-            f"{post_prefix}sample_stats": ["~tune", "accept"],
-            f"{warmup_prefix}warmup_posterior": ["u1", "n1"],
-            f"{warmup_prefix}warmup_sample_stats": ["~tune"],
-            "~warmup_log_likelihood": [""],
-            "~log_likelihood": [""],
-        }
-        fails = check_multiple_attrs(test_dict, idata)
-        assert not fails
-        if hasattr(idata, "posterior"):
-            assert idata.posterior.dims["chain"] == chains
-            assert idata.posterior.dims["draw"] == draws
-        if hasattr(idata, "warmup_posterior"):
-            assert idata.warmup_posterior.dims["chain"] == chains
-            assert idata.warmup_posterior.dims["draw"] == tune
-
-    @pytest.mark.skipif(
-        hasattr(pm.backends.base.SamplerReport, "n_draws"),
-        reason="requires pymc3 3.8 or lower",
-    )
-    def test_save_warmup_issue_1208_before_3_9(self):
-        with pm.Model():
-            pm.Uniform("u1")
-            pm.Normal("n1")
-            trace = pm.sample(
-                tune=100,
-                draws=200,
-                chains=2,
-                cores=1,
-                step=pm.Metropolis(),
-                discard_tuned_samples=False,
-            )
-            assert isinstance(trace, pm.backends.base.MultiTrace)
-            assert len(trace) == 300
-
-            # <=3.8 did not track n_draws in the sampler report,
-            # making from_pymc3 fall back to len(trace) and triggering a warning
-            with pytest.warns(UserWarning, match="Warmup samples"):
-                idata = from_pymc3(trace, save_warmup=True)
-        test_dict = {
-            "posterior": ["u1", "n1"],
-            "sample_stats": ["~tune", "accept"],
-            "~warmup_posterior": [""],
-            "~warmup_sample_stats": [""],
-        }
-        fails = check_multiple_attrs(test_dict, idata)
-        assert not fails
-        assert idata.posterior.dims["draw"] == 300
-        assert idata.posterior.dims["chain"] == 2
-
-    @pytest.mark.skipif(
-        not hasattr(pm.backends.base.SamplerReport, "n_draws"),
-        reason="requires pymc3 3.9 or higher",
-    )
-    def test_save_warmup_issue_1208_after_3_9(self):
-        with pm.Model():
-            pm.Uniform("u1")
-            pm.Normal("n1")
-            trace = pm.sample(
-                tune=100,
-                draws=200,
-                chains=2,
-                cores=1,
-                step=pm.Metropolis(),
-                discard_tuned_samples=False,
-            )
-            assert isinstance(trace, pm.backends.base.MultiTrace)
-            assert len(trace) == 300
-
-            # from original trace, warmup draws should be separated out
-            idata = from_pymc3(trace, save_warmup=True)
-            test_dict = {
-                "posterior": ["u1", "n1"],
-                "sample_stats": ["~tune", "accept"],
-                "warmup_posterior": ["u1", "n1"],
-                "warmup_sample_stats": ["~tune", "accept"],
-            }
-            fails = check_multiple_attrs(test_dict, idata)
-            assert not fails
-            assert idata.posterior.dims["chain"] == 2
-            assert idata.posterior.dims["draw"] == 200
-
-            # manually sliced trace triggers the same warning as <=3.8
-            with pytest.warns(UserWarning, match="Warmup samples"):
-                idata = from_pymc3(trace[-30:], save_warmup=True)
-            test_dict = {
-                "posterior": ["u1", "n1"],
-                "sample_stats": ["~tune", "accept"],
-                "~warmup_posterior": [""],
-                "~warmup_sample_stats": [""],
-            }
-            fails = check_multiple_attrs(test_dict, idata)
-            assert not fails
-            assert idata.posterior.dims["chain"] == 2
-            assert idata.posterior.dims["draw"] == 30
diff --git a/arviz/tests/helpers.py b/arviz/tests/helpers.py
index 5c2382c9a3..2ab42a191c 100644
--- a/arviz/tests/helpers.py
+++ b/arviz/tests/helpers.py
@@ -486,20 +486,6 @@ def pystan_noncentered_schools(data, draws, chains):
     return stan_model, fit
 
 
-def pymc3_noncentered_schools(data, draws, chains):
-    """Non-centered eight schools implementation for pymc3."""
-    import pymc3 as pm
-
-    with pm.Model() as model:
-        mu = pm.Normal("mu", mu=0, sd=5)
-        tau = pm.HalfCauchy("tau", beta=5)
-        eta = pm.Normal("eta", mu=0, sd=1, shape=data["J"])
-        theta = pm.Deterministic("theta", mu + tau * eta)
-        pm.Normal("obs", mu=theta, sd=data["sigma"], observed=data["y"])
-        trace = pm.sample(draws, chains=chains)
-    return model, trace
-
-
 def library_handle(library):
     """Import a library and return the handle."""
     if library == "pystan":
@@ -513,11 +499,10 @@ def library_handle(library):
 
 
 def load_cached_models(eight_schools_data, draws, chains, libs=None):
-    """Load pymc3, pystan, emcee, and pyro models from pickle."""
+    """Load pystan, emcee, and pyro models from pickle."""
     here = os.path.dirname(os.path.abspath(__file__))
     supported = (
         ("pystan", pystan_noncentered_schools),
-        ("pymc3", pymc3_noncentered_schools),
         ("emcee", emcee_schools_model),
         ("pyro", pyro_noncentered_schools),
         ("numpyro", numpyro_schools_model),
diff --git a/doc/source/api/data.rst b/doc/source/api/data.rst
index 06030f96a9..72b50033e4 100644
--- a/doc/source/api/data.rst
+++ b/doc/source/api/data.rst
@@ -17,8 +17,6 @@ Inference library converters
    from_emcee
    from_numpyro
    from_pyjags
-   from_pymc3
-   from_pymc3_predictions
    from_pyro
    from_pystan
 
diff --git a/requirements-external.txt b/requirements-external.txt
index 917381e29f..ca44445aae 100644
--- a/requirements-external.txt
+++ b/requirements-external.txt
@@ -1,7 +1,6 @@
 beanmachine
 emcee
 pyjags
-pymc3 @ git+https://github.com/pymc-devs/pymc3
 pystan
 cmdstanpy
 pyro-ppl>=1.0.0
diff --git a/requirements-test.txt b/requirements-test.txt
new file mode 100644
index 0000000000..25a69e0498
--- /dev/null
+++ b/requirements-test.txt
@@ -0,0 +1,6 @@
+pytest
+pytest-cov
+cloudpickle
+
+-r requirements-optional.txt
+-r requirements-external.txt
\ No newline at end of file