From 68cfade0dc4cc9f1a7c840c03a1dbb033ab0582d Mon Sep 17 00:00:00 2001 From: juanitorduz Date: Mon, 1 Jul 2024 20:25:27 +0200 Subject: [PATCH] update pre-commit --- .pre-commit-config.yaml | 6 +++--- pymc_bart/bart.py | 4 ++-- pymc_bart/pgbart.py | 46 ++++++++++++++++++++--------------------- pymc_bart/tree.py | 40 +++++++++++++++++------------------ pymc_bart/utils.py | 40 +++++++++++++++++------------------ 5 files changed, 68 insertions(+), 68 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c166e5e..009ff75 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,14 +12,14 @@ ci: repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.7 + rev: v0.5.0 hooks: - id: ruff - args: ["--fix", "--show-source"] + args: ["--fix", "--output-format=full"] - id: ruff-format args: ["--line-length=100"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.10.0 + rev: v1.10.1 hooks: - id: mypy args: [--ignore-missing-imports] diff --git a/pymc_bart/bart.py b/pymc_bart/bart.py index 8b8aeb0..f55017b 100644 --- a/pymc_bart/bart.py +++ b/pymc_bart/bart.py @@ -125,7 +125,7 @@ def __new__( alpha: float = 0.95, beta: float = 2.0, response: str = "constant", - split_prior: Optional[npt.NDArray[np.float_]] = None, + split_prior: Optional[npt.NDArray[np.float64]] = None, split_rules: Optional[List[SplitRule]] = None, separate_trees: Optional[bool] = False, **kwargs, @@ -198,7 +198,7 @@ def get_moment(cls, rv, size, *rv_inputs): def preprocess_xy( X: TensorLike, Y: TensorLike -) -> Tuple[npt.NDArray[np.float_], npt.NDArray[np.float_]]: +) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: if isinstance(Y, (Series, DataFrame)): Y = Y.to_numpy() if isinstance(X, (Series, DataFrame)): diff --git a/pymc_bart/pgbart.py b/pymc_bart/pgbart.py index e7b0762..be4a8e8 100644 --- a/pymc_bart/pgbart.py +++ b/pymc_bart/pgbart.py @@ -313,7 +313,7 @@ def normalize(self, particles: List[ParticleTree]) -> float: return wei / wei.sum() def resample( - self, particles: List[ParticleTree], normalized_weights: npt.NDArray[np.float_] + self, particles: List[ParticleTree], normalized_weights: npt.NDArray[np.float64] ) -> List[ParticleTree]: """ Use systematic resample for all but the first particle @@ -335,7 +335,7 @@ def resample( return particles def get_particle_tree( - self, particles: List[ParticleTree], normalized_weights: npt.NDArray[np.float_] + self, particles: List[ParticleTree], normalized_weights: npt.NDArray[np.float64] ) -> Tuple[ParticleTree, Tree]: """ Sample a new particle and associated tree @@ -347,7 +347,7 @@ def get_particle_tree( return new_particle, new_particle.tree - def systematic(self, normalized_weights: npt.NDArray[np.float_]) -> npt.NDArray[np.int_]: + def systematic(self, normalized_weights: npt.NDArray[np.float64]) -> npt.NDArray[np.int_]: """ Systematic resampling. @@ -399,7 +399,7 @@ def __init__(self, shape: tuple) -> None: self.mean = np.zeros(shape) # running mean self.m_2 = np.zeros(shape) # running second moment - def update(self, new_value: npt.NDArray[np.float_]) -> Union[float, npt.NDArray[np.float_]]: + def update(self, new_value: npt.NDArray[np.float64]) -> Union[float, npt.NDArray[np.float64]]: self.count = self.count + 1 self.mean, self.m_2, std = _update(self.count, self.mean, self.m_2, new_value) return fast_mean(std) @@ -408,10 +408,10 @@ def update(self, new_value: npt.NDArray[np.float_]) -> Union[float, npt.NDArray[ @njit def _update( count: int, - mean: npt.NDArray[np.float_], - m_2: npt.NDArray[np.float_], - new_value: npt.NDArray[np.float_], -) -> Tuple[npt.NDArray[np.float_], npt.NDArray[np.float_], Union[float, npt.NDArray[np.float_]]]: + mean: npt.NDArray[np.float64], + m_2: npt.NDArray[np.float64], + new_value: npt.NDArray[np.float64], +) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64], Union[float, npt.NDArray[np.float64]]]: delta = new_value - mean mean += delta / count delta2 = new_value - mean @@ -422,7 +422,7 @@ def _update( class SampleSplittingVariable: - def __init__(self, alpha_vec: npt.NDArray[np.float_]) -> None: + def __init__(self, alpha_vec: npt.NDArray[np.float64]) -> None: """ Sample splitting variables proportional to `alpha_vec`. @@ -535,13 +535,13 @@ def filter_missing_values(available_splitting_values, idx_data_points, missing_d def draw_leaf_value( - y_mu_pred: npt.NDArray[np.float_], - x_mu: npt.NDArray[np.float_], + y_mu_pred: npt.NDArray[np.float64], + x_mu: npt.NDArray[np.float64], m: int, - norm: npt.NDArray[np.float_], + norm: npt.NDArray[np.float64], shape: int, response: str, -) -> Tuple[npt.NDArray[np.float_], Optional[npt.NDArray[np.float_]]]: +) -> Tuple[npt.NDArray[np.float64], Optional[npt.NDArray[np.float64]]]: """Draw Gaussian distributed leaf values.""" linear_params = None mu_mean = np.empty(shape) @@ -559,7 +559,7 @@ def draw_leaf_value( @njit -def fast_mean(ari: npt.NDArray[np.float_]) -> Union[float, npt.NDArray[np.float_]]: +def fast_mean(ari: npt.NDArray[np.float64]) -> Union[float, npt.NDArray[np.float64]]: """Use Numba to speed up the computation of the mean.""" if ari.ndim == 1: count = ari.shape[0] @@ -578,11 +578,11 @@ def fast_mean(ari: npt.NDArray[np.float_]) -> Union[float, npt.NDArray[np.float_ @njit def fast_linear_fit( - x: npt.NDArray[np.float_], - y: npt.NDArray[np.float_], + x: npt.NDArray[np.float64], + y: npt.NDArray[np.float64], m: int, - norm: npt.NDArray[np.float_], -) -> Tuple[npt.NDArray[np.float_], List[npt.NDArray[np.float_]]]: + norm: npt.NDArray[np.float64], +) -> Tuple[npt.NDArray[np.float64], List[npt.NDArray[np.float64]]]: n = len(x) y = y / m + np.expand_dims(norm, axis=1) @@ -666,17 +666,17 @@ def update(self): @njit def inverse_cdf( - single_uniform: npt.NDArray[np.float_], normalized_weights: npt.NDArray[np.float_] + single_uniform: npt.NDArray[np.float64], normalized_weights: npt.NDArray[np.float64] ) -> npt.NDArray[np.int_]: """ Inverse CDF algorithm for a finite distribution. Parameters ---------- - single_uniform: npt.NDArray[np.float_] + single_uniform: npt.NDArray[np.float64] Ordered points in [0,1] - normalized_weights: npt.NDArray[np.float_]) + normalized_weights: npt.NDArray[np.float64]) Normalized weights Returns @@ -699,7 +699,7 @@ def inverse_cdf( @njit -def jitter_duplicated(array: npt.NDArray[np.float_], std: float) -> npt.NDArray[np.float_]: +def jitter_duplicated(array: npt.NDArray[np.float64], std: float) -> npt.NDArray[np.float64]: """ Jitter duplicated values. """ @@ -715,7 +715,7 @@ def jitter_duplicated(array: npt.NDArray[np.float_], std: float) -> npt.NDArray[ @njit -def are_whole_number(array: npt.NDArray[np.float_]) -> np.bool_: +def are_whole_number(array: npt.NDArray[np.float64]) -> np.bool_: """Check if all values in array are whole numbers""" return np.all(np.mod(array[~np.isnan(array)], 1) == 0) diff --git a/pymc_bart/tree.py b/pymc_bart/tree.py index c9bac2d..0e0a35c 100644 --- a/pymc_bart/tree.py +++ b/pymc_bart/tree.py @@ -27,7 +27,7 @@ class Node: Attributes ---------- - value : npt.NDArray[np.float_] + value : npt.NDArray[np.float64] idx_data_points : Optional[npt.NDArray[np.int_]] idx_split_variable : int linear_params: Optional[List[float]] = None @@ -37,11 +37,11 @@ class Node: def __init__( self, - value: npt.NDArray[np.float_] = np.array([-1.0]), + value: npt.NDArray[np.float64] = np.array([-1.0]), nvalue: int = 0, idx_data_points: Optional[npt.NDArray[np.int_]] = None, idx_split_variable: int = -1, - linear_params: Optional[List[npt.NDArray[np.float_]]] = None, + linear_params: Optional[List[npt.NDArray[np.float64]]] = None, ) -> None: self.value = value self.nvalue = nvalue @@ -52,11 +52,11 @@ def __init__( @classmethod def new_leaf_node( cls, - value: npt.NDArray[np.float_], + value: npt.NDArray[np.float64], nvalue: int = 0, idx_data_points: Optional[npt.NDArray[np.int_]] = None, idx_split_variable: int = -1, - linear_params: Optional[List[npt.NDArray[np.float_]]] = None, + linear_params: Optional[List[npt.NDArray[np.float64]]] = None, ) -> "Node": return cls( value=value, @@ -100,7 +100,7 @@ class Tree: The dictionary's keys are integers that represent the nodes position. The dictionary's values are objects of type Node that represent the split and leaf nodes of the tree itself. - output: Optional[npt.NDArray[np.float_]] + output: Optional[npt.NDArray[np.float64]] Array of shape number of observations, shape split_rules : List[SplitRule] List of SplitRule objects, one per column in input data. @@ -121,7 +121,7 @@ class Tree: def __init__( self, tree_structure: Dict[int, Node], - output: npt.NDArray[np.float_], + output: npt.NDArray[np.float64], split_rules: List[SplitRule], idx_leaf_nodes: Optional[List[int]] = None, ) -> None: @@ -133,7 +133,7 @@ def __init__( @classmethod def new_tree( cls, - leaf_node_value: npt.NDArray[np.float_], + leaf_node_value: npt.NDArray[np.float64], idx_data_points: Optional[npt.NDArray[np.int_]], num_observations: int, shape: int, @@ -189,7 +189,7 @@ def grow_leaf_node( self, current_node: Node, selected_predictor: int, - split_value: npt.NDArray[np.float_], + split_value: npt.NDArray[np.float64], index_leaf_node: int, ) -> None: current_node.value = split_value @@ -221,7 +221,7 @@ def get_split_variables(self) -> Generator[int, None, None]: if node.is_split_node(): yield node.idx_split_variable - def _predict(self) -> npt.NDArray[np.float_]: + def _predict(self) -> npt.NDArray[np.float64]: output = self.output if self.idx_leaf_nodes is not None: @@ -232,23 +232,23 @@ def _predict(self) -> npt.NDArray[np.float_]: def predict( self, - x: npt.NDArray[np.float_], + x: npt.NDArray[np.float64], excluded: Optional[List[int]] = None, shape: int = 1, - ) -> npt.NDArray[np.float_]: + ) -> npt.NDArray[np.float64]: """ Predict output of tree for an (un)observed point x. Parameters ---------- - x : npt.NDArray[np.float_] + x : npt.NDArray[np.float64] Unobserved point excluded: Optional[List[int]] Indexes of the variables to exclude when computing predictions Returns ------- - npt.NDArray[np.float_] + npt.NDArray[np.float64] Value of the leaf value where the unobserved point lies. """ if excluded is None: @@ -258,16 +258,16 @@ def predict( def _traverse_tree( self, - X: npt.NDArray[np.float_], + X: npt.NDArray[np.float64], excluded: Optional[List[int]] = None, shape: Union[int, Tuple[int, ...]] = 1, - ) -> npt.NDArray[np.float_]: + ) -> npt.NDArray[np.float64]: """ Traverse the tree starting from the root node given an (un)observed point. Parameters ---------- - X : npt.NDArray[np.float_] + X : npt.NDArray[np.float64] (Un)observed point(s) node_index : int Index of the node to start the traversal from @@ -278,7 +278,7 @@ def _traverse_tree( Returns ------- - npt.NDArray[np.float_] + npt.NDArray[np.float64] Leaf node value or mean of leaf node values """ @@ -327,14 +327,14 @@ def _traverse_tree( return p_d def _traverse_leaf_values( - self, leaf_values: List[npt.NDArray[np.float_]], leaf_n_values: List[int], node_index: int + self, leaf_values: List[npt.NDArray[np.float64]], leaf_n_values: List[int], node_index: int ) -> None: """ Traverse the tree appending leaf values starting from a particular node. Parameters ---------- - leaf_values : List[npt.NDArray[np.float_]] + leaf_values : List[npt.NDArray[np.float64]] node_index : int """ node = self.get_node(node_index) diff --git a/pymc_bart/utils.py b/pymc_bart/utils.py index ef61cd8..e506581 100644 --- a/pymc_bart/utils.py +++ b/pymc_bart/utils.py @@ -15,7 +15,7 @@ from .tree import Tree -TensorLike = Union[npt.NDArray[np.float_], pt.TensorVariable] +TensorLike = Union[npt.NDArray[np.float64], pt.TensorVariable] def _sample_posterior( @@ -25,7 +25,7 @@ def _sample_posterior( size: Optional[Union[int, Tuple[int, ...]]] = None, excluded: Optional[List[int]] = None, shape: int = 1, -) -> npt.NDArray[np.float_]: +) -> npt.NDArray[np.float64]: """ Generate samples from the BART-posterior. @@ -153,8 +153,8 @@ def plot_dependence(*args, kind="pdp", **kwargs): # pylint: disable=unused-argu def plot_ice( bartrv: Variable, - X: npt.NDArray[np.float_], - Y: Optional[npt.NDArray[np.float_]] = None, + X: npt.NDArray[np.float64], + Y: Optional[npt.NDArray[np.float64]] = None, var_idx: Optional[List[int]] = None, var_discrete: Optional[List[int]] = None, func: Optional[Callable] = None, @@ -179,9 +179,9 @@ def plot_ice( ---------- bartrv : BART Random Variable BART variable once the model that include it has been fitted. - X : npt.NDArray[np.float_] + X : npt.NDArray[np.float64] The covariate matrix. - Y : Optional[npt.NDArray[np.float_]], by default None. + Y : Optional[npt.NDArray[np.float64]], by default None. The response vector. var_idx : Optional[List[int]], by default None. List of the indices of the covariate for which to compute the pdp or ice. @@ -298,8 +298,8 @@ def identity(x): def plot_pdp( bartrv: Variable, - X: npt.NDArray[np.float_], - Y: Optional[npt.NDArray[np.float_]] = None, + X: npt.NDArray[np.float64], + Y: Optional[npt.NDArray[np.float64]] = None, xs_interval: str = "quantiles", xs_values: Optional[Union[int, List[float]]] = None, var_idx: Optional[List[int]] = None, @@ -324,9 +324,9 @@ def plot_pdp( ---------- bartrv : BART Random Variable BART variable once the model that include it has been fitted. - X : npt.NDArray[np.float_] + X : npt.NDArray[np.float64] The covariate matrix. - Y : Optional[npt.NDArray[np.float_]], by default None. + Y : Optional[npt.NDArray[np.float64]], by default None. The response vector. xs_interval : str Method used to compute the values X used to evaluate the predicted function. "linear", @@ -525,14 +525,14 @@ def _get_axes( def _prepare_plot_data( - X: npt.NDArray[np.float_], - Y: Optional[npt.NDArray[np.float_]] = None, + X: npt.NDArray[np.float64], + Y: Optional[npt.NDArray[np.float64]] = None, xs_interval: str = "quantiles", xs_values: Optional[Union[int, List[float]]] = None, var_idx: Optional[List[int]] = None, var_discrete: Optional[List[int]] = None, ) -> Tuple[ - npt.NDArray[np.float_], + npt.NDArray[np.float64], List[str], str, List[int], @@ -618,10 +618,10 @@ def _prepare_plot_data( def _create_pdp_data( - X: npt.NDArray[np.float_], + X: npt.NDArray[np.float64], xs_interval: str, xs_values: Optional[Union[int, List[float]]] = None, -) -> npt.NDArray[np.float_]: +) -> npt.NDArray[np.float64]: """ Create data for partial dependence plot. @@ -636,7 +636,7 @@ def _create_pdp_data( Returns ------- - npt.NDArray[np.float_] + npt.NDArray[np.float64] A 2D array for the fake_X data. """ if xs_interval == "insample": @@ -653,8 +653,8 @@ def _create_pdp_data( def _smooth_mean( - new_x: npt.NDArray[np.float_], - p_di: npt.NDArray[np.float_], + new_x: npt.NDArray[np.float64], + p_di: npt.NDArray[np.float64], kind: str = "pdp", smooth_kwargs: Optional[Dict[str, Any]] = None, ) -> Tuple[np.ndarray, np.ndarray]: @@ -695,7 +695,7 @@ def _smooth_mean( def plot_variable_importance( # noqa: PLR0915 idata: az.InferenceData, bartrv: Variable, - X: npt.NDArray[np.float_], + X: npt.NDArray[np.float64], labels: Optional[List[str]] = None, method: str = "VI", figsize: Optional[Tuple[float, float]] = None, @@ -713,7 +713,7 @@ def plot_variable_importance( # noqa: PLR0915 InferenceData containing a collection of BART_trees in sample_stats group bartrv : BART Random Variable BART variable once the model that include it has been fitted. - X : npt.NDArray[np.float_] + X : npt.NDArray[np.float64] The covariate matrix. labels : Optional[List[str]] List of the names of the covariates. If X is a DataFrame the names of the covariables will