Skip to content

Commit

Permalink
Merge pull request #22 from msamsami/enhancements
Browse files Browse the repository at this point in the history
Adhere to sklearn's docstring and type hint format, minor improvements
  • Loading branch information
msamsami authored Dec 29, 2023
2 parents 484d6da + 0d61020 commit bb62873
Show file tree
Hide file tree
Showing 10 changed files with 313 additions and 132 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,5 @@ dmypy.json

# Test files
/test
test.ipynb
test.ipynb
dummy.py
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# WNB: General and weighted naive Bayes classifiers

![](https://img.shields.io/badge/version-v0.2.0-green)
![](https://img.shields.io/badge/version-v0.2.1-green)
![](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)
![](https://github.com/msamsami/weighted-naive-bayes/actions/workflows/python-publish.yml/badge.svg)
[![](https://img.shields.io/pypi/v/wnb)](https://pypi.org/project/wnb/)
Expand Down
27 changes: 27 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[metadata]
name = wnb
version = attr: wnb.__version__
author = Mehdi Samsami
author_email = mehdisamsami@live.com
description = General and Weighted Naive Bayes Classifiers
long_description = file: README.md
long_description_content_type = text/markdown
url = https://github.com/msamsami/weighted-naive-bayes
keywords = python, bayes, naivebayes, classifier, probabilistic
license = BSD

[options]
packages = find:
python_requires = >=3.7
install_requires =
pandas
scipy
scikit-learn

[options.extras_require]
dev =
pytest
black

[aliases]
test = pytest
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
from os import path
from setuptools import setup, find_packages

with open(path.join("wnb", "__init__.py")) as f:
exec(f.readlines(1)[0])


setup(
name="wnb",
version="0.2.0",
version=__version__,
description="Python library for the implementations of general and weighted naive Bayes (WNB) classifiers.",
keywords=["python", "bayes", "naivebayes", "classifier", "probabilistic"],
author="Mehdi Samsami",
Expand Down
2 changes: 1 addition & 1 deletion wnb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.2.0"
__version__ = "0.2.1"
__author__ = "Mehdi Samsami"


Expand Down
29 changes: 29 additions & 0 deletions wnb/_enum_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from enum import EnumMeta, Enum
from typing import Any


class CaseInsensitiveEnumMeta(EnumMeta):
"""
Enum metaclass to allow for interoperability with case-insensitive strings.
"""

def __getitem__(cls, name: str) -> Any:
return super(CaseInsensitiveEnumMeta, cls).__getitem__(name.upper())

def __getattr__(cls, name: str) -> Enum:
"""Returns the enum member matching `name`.
We use __getattr__ instead of descriptors or inserting into the enum
class' __dict__ in order to support `name` and `value` being both
properties for enum members (which live in the class' __dict__) and
enum members themselves.
:param str name: The name of the enum member to retrieve.
:rtype: ~CaseInsensitiveEnumMeta
:return: The enum member matching `name`.
:raises AttributeError: If `name` is not a valid enum member.
"""
try:
return cls._member_map_[name.upper()]
except KeyError as err:
raise AttributeError(name) from err
4 changes: 3 additions & 1 deletion wnb/_enums.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from enum import Enum

from ._enum_meta import CaseInsensitiveEnumMeta

__all__ = ["Distribution"]


class Distribution(str, Enum):
class Distribution(str, Enum, metaclass=CaseInsensitiveEnumMeta):
"""
Names of probability distributions.
"""
Expand Down
24 changes: 24 additions & 0 deletions wnb/_typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Union, Type

import numpy.typing
import numpy as np
import pandas as pd
from scipy.sparse import spmatrix

from ._base import ContinuousDistMixin, DiscreteDistMixin
from ._enums import Distribution

__all__ = ["MatrixLike", "ArrayLike", "Int", "Float", "DistibutionLike"]

ArrayLike = numpy.typing.ArrayLike
MatrixLike = Union[np.ndarray, pd.DataFrame, spmatrix]

Int = Union[int, np.int8, np.int16, np.int32, np.int64]
Float = Union[float, np.float16, np.float32, np.float64]

DistibutionLike = Union[
str,
Distribution,
Type[ContinuousDistMixin],
Type[DiscreteDistMixin],
]
164 changes: 98 additions & 66 deletions wnb/gnb.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABCMeta
from typing import Union, Optional, Sequence, Type
from typing import Optional, Sequence
import warnings

import numpy as np
Expand All @@ -12,8 +12,8 @@
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_is_fitted

from ._base import ContinuousDistMixin, DiscreteDistMixin
from ._enums import Distribution
from ._typing import MatrixLike, ArrayLike, Float, DistibutionLike
from .dist import AllDistributions, NonNumericDistributions

__all__ = [
Expand All @@ -22,45 +22,64 @@


class GeneralNB(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):
"""
A general Naive Bayes classifier that allows you to specify the likelihood distribution for each feature.
"""A General Naive Bayes classifier that supports distinct likelihood distributions for individual features,
enabling more tailored modeling beyond the standard single-distribution approaches such as GaussianNB and BernoulliNB.
Parameters
----------
priors : array-like of shape (n_classes,), default=None
Prior probabilities of the classes. If specified, the priors are not
adjusted according to the data.
distributions : sequence of distribution-like of length n_features, default=None
Probability distributions to be used for features' likelihoods. If not specified,
all likelihoods will be considered Gaussian.
alpha : float, default=1e-10
Additive (Laplace/Lidstone) smoothing parameter. Set alpha=0 for no smoothing.
Attributes
----------
class_count_ : ndarray of shape (n_classes,)
Number of training samples observed in each class.
class_prior_ : ndarray of shape (n_classes,)
Probability of each class.
classes_ : ndarray of shape (n_classes,)
Class labels known to the classifier.
n_classes_ : int
Number of classes seen during :term:`fit`.
n_features_in_ : int
Number of features seen during :term:`fit`.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
distributions_ : list of length `n_features_in_`
List of likelihood distributions used to fit to features.
likelihood_params_ : dict
A mapping from class labels to their fitted likelihood distributions.
"""

feature_names_in_: np.ndarray
n_features_in_: int
classes_: np.ndarray
class_prior_: np.ndarray
class_count_: np.ndarray
class_prior_: np.ndarray
classes_: np.ndarray
n_classes_: int
n_features_in_: int
feature_names_in_: np.ndarray
distributions_: list
likelihood_params_: dict

def __init__(
self,
*,
priors: Optional[Union[Sequence[float], np.ndarray]] = None,
distributions: Optional[
Sequence[
Union[
str,
Distribution,
Type[ContinuousDistMixin],
Type[DiscreteDistMixin],
]
]
] = None,
alpha: float = 1e-10,
priors: Optional[ArrayLike] = None,
distributions: Optional[Sequence[DistibutionLike]] = None,
alpha: Float = 1e-10,
) -> None:
"""Initializes an instance of the GeneralNB class.
Args:
priors (Optional[Union[list, np.ndarray]]): Prior probabilities. Defaults to None.
distributions: Probability distributions to be used for features' likelihoods. A sequence with same length
of the number of features. If not specified, all likelihood will be considered Gaussian.
Defaults to None.
alpha (float): Additive (Laplace/Lidstone) smoothing parameter (set alpha=0 for no smoothing). Defaults to 1e-10.
"""
self.priors = priors
self.distributions = distributions
self.alpha = alpha
Expand Down Expand Up @@ -188,21 +207,22 @@ def _prepare_parameters(self):

self.distributions_ = self.distributions

def fit(
self,
X: Union[np.ndarray, pd.DataFrame],
y: Union[np.ndarray, pd.DataFrame, pd.Series],
):
"""Fits general Naive Bayes classifier to X and y.
Args:
X (Union[np.ndarray, pd.DataFrame]): Array-like of shape (n_samples, n_features).
Training vectors, where `n_samples` is the number of samples
and `n_features` is the number of features.
y (Union[np.ndarray, pd.DataFrame, pd.Series]): Array-like of shape (n_samples,). Target values.
Returns:
self: The instance itself.
def fit(self, X: MatrixLike, y: ArrayLike):
"""Fits general Naive Bayes classifier according to X, y.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training vectors, where `n_samples` is the number of samples
and `n_features` is the number of features.
y : array-like of shape (n_samples,)
Target values.
Returns
-------
self : object
Returns the instance itself.
"""
self._check_n_features(X=X, reset=True)
self._check_feature_names(X=X, reset=True)
Expand Down Expand Up @@ -234,29 +254,37 @@ def fit(

return self

def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
def predict(self, X: MatrixLike) -> np.ndarray:
"""Performs classification on an array of test vectors X.
Args:
X (Union[np.ndarray, pd.DataFrame]): Array-like of shape (n_samples, n_features). The input samples.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input samples.
Returns:
np.ndarray: ndarray of shape (n_samples,). Predicted target values for X.
Returns
-------
C : ndarray of shape (n_samples,)
Predicted target values for X.
"""
p_hat = self.predict_log_proba(X)
y_hat = self.classes_[np.argmax(p_hat, axis=1)]
return y_hat

def predict_log_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
"""Returns log-probability estimates for the test vector X.
def predict_log_proba(self, X: MatrixLike) -> np.ndarray:
"""Returns log-probability estimates for the array of test vectors X.
Args:
X (Union[np.ndarray, pd.DataFrame]): Array-like of shape (n_samples, n_features). The input samples.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input samples.
Returns:
np.ndarray: Array-like of shape (n_samples, n_classes).
The log-probability of the samples for each class in the model.
The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`.
Returns
-------
C : array-like of shape (n_samples, n_classes)
Returns the log-probability of the samples for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute :term:`classes_`.
"""
# Check is fit had been called
check_is_fitted(self)
Expand Down Expand Up @@ -300,15 +328,19 @@ def predict_log_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
)
return log_proba

def predict_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
"""Returns probability estimates for the test vector X.
def predict_proba(self, X: MatrixLike) -> np.ndarray:
"""Returns probability estimates for the array of test vectors X.
Args:
X (Union[np.ndarray, pd.DataFrame]): Array-like of shape (n_samples, n_features). The input samples.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input samples.
Returns:
np.ndarray: Array-like of shape (n_samples, n_classes).
The probability of the samples for each class in the model.
The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`.
Returns
-------
C : array-like of shape (n_samples, n_classes)
Returns the probability of the samples for each class in
the model. The columns correspond to the classes in sorted
order, as they appear in the attribute :term:`classes_`.
"""
return np.exp(self.predict_log_proba(X))
Loading

0 comments on commit bb62873

Please sign in to comment.