Merge pull request #22 from msamsami/enhancements

Adhere to sklearn's docstring and type hint format, minor improvements
msamsami · Dec 29, 2023 · bb62873 · bb62873
2 parents 484d6da + 0d61020
commit bb62873
Show file tree

Hide file tree

Showing 10 changed files with 313 additions and 132 deletions.
diff --git a/.gitignore b/.gitignore
@@ -133,4 +133,5 @@ dmypy.json
 
 # Test files
 /test
-test.ipynb
+test.ipynb
+dummy.py
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # WNB: General and weighted naive Bayes classifiers
 
-![](https://img.shields.io/badge/version-v0.2.0-green)
+![](https://img.shields.io/badge/version-v0.2.1-green)
 ![](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)
 ![](https://github.com/msamsami/weighted-naive-bayes/actions/workflows/python-publish.yml/badge.svg)
 [![](https://img.shields.io/pypi/v/wnb)](https://pypi.org/project/wnb/)

diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,27 @@
+[metadata]
+name = wnb
+version = attr: wnb.__version__
+author = Mehdi Samsami
+author_email = mehdisamsami@live.com
+description = General and Weighted Naive Bayes Classifiers
+long_description = file: README.md
+long_description_content_type = text/markdown
+url = https://github.com/msamsami/weighted-naive-bayes
+keywords = python, bayes, naivebayes, classifier, probabilistic
+license = BSD
+
+[options]
+packages = find:
+python_requires = >=3.7
+install_requires =
+    pandas
+    scipy
+    scikit-learn
+
+[options.extras_require]
+dev =
+    pytest
+    black
+
+[aliases]
+test = pytest
diff --git a/setup.py b/setup.py
@@ -2,10 +2,13 @@
 from os import path
 from setuptools import setup, find_packages
 
+with open(path.join("wnb", "__init__.py")) as f:
+    exec(f.readlines(1)[0])
+
 
 setup(
     name="wnb",
-    version="0.2.0",
+    version=__version__,
     description="Python library for the implementations of general and weighted naive Bayes (WNB) classifiers.",
     keywords=["python", "bayes", "naivebayes", "classifier", "probabilistic"],
     author="Mehdi Samsami",

diff --git a/wnb/__init__.py b/wnb/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 __author__ = "Mehdi Samsami"
 
 

diff --git a/wnb/_enum_meta.py b/wnb/_enum_meta.py
@@ -0,0 +1,29 @@
+from enum import EnumMeta, Enum
+from typing import Any
+
+
+class CaseInsensitiveEnumMeta(EnumMeta):
+    """
+    Enum metaclass to allow for interoperability with case-insensitive strings.
+    """
+
+    def __getitem__(cls, name: str) -> Any:
+        return super(CaseInsensitiveEnumMeta, cls).__getitem__(name.upper())
+
+    def __getattr__(cls, name: str) -> Enum:
+        """Returns the enum member matching `name`.
+
+        We use __getattr__ instead of descriptors or inserting into the enum
+        class' __dict__ in order to support `name` and `value` being both
+        properties for enum members (which live in the class' __dict__) and
+        enum members themselves.
+
+        :param str name: The name of the enum member to retrieve.
+        :rtype: ~CaseInsensitiveEnumMeta
+        :return: The enum member matching `name`.
+        :raises AttributeError: If `name` is not a valid enum member.
+        """
+        try:
+            return cls._member_map_[name.upper()]
+        except KeyError as err:
+            raise AttributeError(name) from err
diff --git a/wnb/_enums.py b/wnb/_enums.py
@@ -1,9 +1,11 @@
 from enum import Enum
 
+from ._enum_meta import CaseInsensitiveEnumMeta
+
 __all__ = ["Distribution"]
 
 
-class Distribution(str, Enum):
+class Distribution(str, Enum, metaclass=CaseInsensitiveEnumMeta):
     """
     Names of probability distributions.
     """

diff --git a/wnb/_typing.py b/wnb/_typing.py
@@ -0,0 +1,24 @@
+from typing import Union, Type
+
+import numpy.typing
+import numpy as np
+import pandas as pd
+from scipy.sparse import spmatrix
+
+from ._base import ContinuousDistMixin, DiscreteDistMixin
+from ._enums import Distribution
+
+__all__ = ["MatrixLike", "ArrayLike", "Int", "Float", "DistibutionLike"]
+
+ArrayLike = numpy.typing.ArrayLike
+MatrixLike = Union[np.ndarray, pd.DataFrame, spmatrix]
+
+Int = Union[int, np.int8, np.int16, np.int32, np.int64]
+Float = Union[float, np.float16, np.float32, np.float64]
+
+DistibutionLike = Union[
+    str,
+    Distribution,
+    Type[ContinuousDistMixin],
+    Type[DiscreteDistMixin],
+]
diff --git a/wnb/gnb.py b/wnb/gnb.py
@@ -1,5 +1,5 @@
 from abc import ABCMeta
-from typing import Union, Optional, Sequence, Type
+from typing import Optional, Sequence
 import warnings
 
 import numpy as np
@@ -12,8 +12,8 @@
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.utils.validation import check_is_fitted
 
-from ._base import ContinuousDistMixin, DiscreteDistMixin
 from ._enums import Distribution
+from ._typing import MatrixLike, ArrayLike, Float, DistibutionLike
 from .dist import AllDistributions, NonNumericDistributions
 
 __all__ = [
@@ -22,45 +22,64 @@
 
 
 class GeneralNB(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):
-    """
-    A general Naive Bayes classifier that allows you to specify the likelihood distribution for each feature.
+    """A General Naive Bayes classifier that supports distinct likelihood distributions for individual features,
+    enabling more tailored modeling beyond the standard single-distribution approaches such as GaussianNB and BernoulliNB.
+
+    Parameters
+    ----------
+    priors : array-like of shape (n_classes,), default=None
+        Prior probabilities of the classes. If specified, the priors are not
+        adjusted according to the data.
+    distributions : sequence of distribution-like of length n_features, default=None
+        Probability distributions to be used for features' likelihoods. If not specified,
+        all likelihoods will be considered Gaussian.
+    alpha : float, default=1e-10
+        Additive (Laplace/Lidstone) smoothing parameter. Set alpha=0 for no smoothing.
+
+    Attributes
+    ----------
+    class_count_ : ndarray of shape (n_classes,)
+        Number of training samples observed in each class.
+
+    class_prior_ : ndarray of shape (n_classes,)
+        Probability of each class.
+
+    classes_ : ndarray of shape (n_classes,)
+        Class labels known to the classifier.
+
+    n_classes_ : int
+        Number of classes seen during :term:`fit`.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+    distributions_ : list of length `n_features_in_`
+        List of likelihood distributions used to fit to features.
+
+    likelihood_params_ : dict
+        A mapping from class labels to their fitted likelihood distributions.
     """
 
-    feature_names_in_: np.ndarray
-    n_features_in_: int
-    classes_: np.ndarray
-    class_prior_: np.ndarray
     class_count_: np.ndarray
+    class_prior_: np.ndarray
+    classes_: np.ndarray
     n_classes_: int
+    n_features_in_: int
+    feature_names_in_: np.ndarray
     distributions_: list
     likelihood_params_: dict
 
     def __init__(
         self,
         *,
-        priors: Optional[Union[Sequence[float], np.ndarray]] = None,
-        distributions: Optional[
-            Sequence[
-                Union[
-                    str,
-                    Distribution,
-                    Type[ContinuousDistMixin],
-                    Type[DiscreteDistMixin],
-                ]
-            ]
-        ] = None,
-        alpha: float = 1e-10,
+        priors: Optional[ArrayLike] = None,
+        distributions: Optional[Sequence[DistibutionLike]] = None,
+        alpha: Float = 1e-10,
     ) -> None:
-        """Initializes an instance of the GeneralNB class.
-
-        Args:
-            priors (Optional[Union[list, np.ndarray]]): Prior probabilities. Defaults to None.
-            distributions: Probability distributions to be used for features' likelihoods. A sequence with same length
-                           of the number of features. If not specified, all likelihood will be considered Gaussian.
-                           Defaults to None.
-            alpha (float): Additive (Laplace/Lidstone) smoothing parameter (set alpha=0 for no smoothing). Defaults to 1e-10.
-
-        """
         self.priors = priors
         self.distributions = distributions
         self.alpha = alpha
@@ -188,21 +207,22 @@ def _prepare_parameters(self):
 
             self.distributions_ = self.distributions
 
-    def fit(
-        self,
-        X: Union[np.ndarray, pd.DataFrame],
-        y: Union[np.ndarray, pd.DataFrame, pd.Series],
-    ):
-        """Fits general Naive Bayes classifier to X and y.
-
-        Args:
-            X (Union[np.ndarray, pd.DataFrame]): Array-like of shape (n_samples, n_features).
-                                                 Training vectors, where `n_samples` is the number of samples
-                                                 and `n_features` is the number of features.
-            y (Union[np.ndarray, pd.DataFrame, pd.Series]): Array-like of shape (n_samples,). Target values.
-
-        Returns:
-            self: The instance itself.
+    def fit(self, X: MatrixLike, y: ArrayLike):
+        """Fits general Naive Bayes classifier according to X, y.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training vectors, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
         """
         self._check_n_features(X=X, reset=True)
         self._check_feature_names(X=X, reset=True)
@@ -234,29 +254,37 @@ def fit(
 
         return self
 
-    def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
+    def predict(self, X: MatrixLike) -> np.ndarray:
         """Performs classification on an array of test vectors X.
 
-        Args:
-            X (Union[np.ndarray, pd.DataFrame]): Array-like of shape (n_samples, n_features). The input samples.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples.
 
-        Returns:
-            np.ndarray: ndarray of shape (n_samples,). Predicted target values for X.
+        Returns
+        -------
+        C : ndarray of shape (n_samples,)
+            Predicted target values for X.
         """
         p_hat = self.predict_log_proba(X)
         y_hat = self.classes_[np.argmax(p_hat, axis=1)]
         return y_hat
 
-    def predict_log_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
-        """Returns log-probability estimates for the test vector X.
+    def predict_log_proba(self, X: MatrixLike) -> np.ndarray:
+        """Returns log-probability estimates for the array of test vectors X.
 
-        Args:
-            X (Union[np.ndarray, pd.DataFrame]): Array-like of shape (n_samples, n_features). The input samples.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples.
 
-        Returns:
-            np.ndarray: Array-like of shape (n_samples, n_classes).
-                        The log-probability of the samples for each class in the model.
-                        The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`.
+        Returns
+        -------
+        C : array-like of shape (n_samples, n_classes)
+            Returns the log-probability of the samples for each class in
+            the model. The columns correspond to the classes in sorted
+            order, as they appear in the attribute :term:`classes_`.
         """
         # Check is fit had been called
         check_is_fitted(self)
@@ -300,15 +328,19 @@ def predict_log_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
         )
         return log_proba
 
-    def predict_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
-        """Returns probability estimates for the test vector X.
+    def predict_proba(self, X: MatrixLike) -> np.ndarray:
+        """Returns probability estimates for the array of test vectors X.
 
-        Args:
-            X (Union[np.ndarray, pd.DataFrame]): Array-like of shape (n_samples, n_features). The input samples.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples.
 
-        Returns:
-            np.ndarray: Array-like of shape (n_samples, n_classes).
-                        The probability of the samples for each class in the model.
-                        The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`.
+        Returns
+        -------
+        C : array-like of shape (n_samples, n_classes)
+            Returns the probability of the samples for each class in
+            the model. The columns correspond to the classes in sorted
+            order, as they appear in the attribute :term:`classes_`.
         """
         return np.exp(self.predict_log_proba(X))