Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Abdelrahman13-coder authored Apr 19, 2022
1 parent db320e8 commit dbe5baf
Showing 1 changed file with 214 additions and 0 deletions.
214 changes: 214 additions & 0 deletions ML algorithms from scratch/Naive Bayes from scratch.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Naive Bayes from scratch.ipynb.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"\n",
"\n",
"## Naive Bayes \n",
"\n"
],
"metadata": {
"id": "ZPIkyYBagucI"
}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "32VI1xFQgtIP"
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"source": [
"class NaiveBayes:\n",
" def fit(self,X,y):\n",
" no_samples, no_features = X.shape\n",
" self.no_classes = len(np.unique(y))\n",
" self.mean = np.zeros((self.no_classes, no_features))\n",
" self.var = np.zeros((self.no_classes, no_features))\n",
" self.priors = np.zeros(self.no_classes)\n",
" for c in range(self.no_classes):\n",
" x_class = X[y==c]\n",
" \n",
" self.mean[c,:] = np.mean(x_class, axis = 0)\n",
" self.var[c,:] = np.mean(x_class, axis = 0)\n",
" self.priors[c] = x_class.shape[0] / float(no_features) \n",
" \n",
" def gaus_class_probability(self,x):\n",
" posteriors = []\n",
"\n",
" for c in range(self.no_classes):\n",
" mean = self.mean[c]\n",
" var = self.var[c]\n",
" prior = np.log(self.priors[c])\n",
"\n",
" conditional_prob = np.sum(np.log(self.gaussian_density(x,mean,var)))\n",
" posterior = prior + conditional_prob\n",
"\n",
" posteriors.append(posterior)\n",
" #return the index of the highest class probability\n",
" return np.argmax(posteriors)\n",
" def gaussian_density(self,x,mean,var):\n",
" constant = 1 / np.sqrt(var *2 * np.pi)\n",
"\n",
" probability = np.exp(-0.5 * ((x - mean)**2 / var))\n",
"\n",
" gaus = constant * probability\n",
" \n",
" return gaus\n",
" def gaus_predict(self,X):\n",
" y_pred = [self.gaus_class_probability(x) for x in X]\n",
" return np.array(y_pred)\n",
"\n",
"\n",
"\n",
"\n",
" # def pdf_class_probability(self,x):\n",
" # posteriors = []\n",
"\n",
" # for c in range(self.no_classes):\n",
" # mean = self.mean[c]\n",
" # var = self.var[c]\n",
" # prior = np.log(self.priors[c])\n",
"\n",
" # conditional_prob = np.sum(np.log(self.pdf(c,x)))\n",
" # posterior = prior + conditional_prob\n",
" # posteriors.append(posterior)\n",
" # #return the index of the highest class probability\n",
" # return np.argmax(posteriors)\n",
" # def pdf_predict(self,X):\n",
" # y_pred = [self.gaus_class_probability(x) for x in X]\n",
" # return np.array(y_pred)\n",
" \n",
" \n",
" \n",
" # def pdf(self,class_idx, x):\n",
" # mean = self.mean[class_idx]\n",
" # var = self.var[class_idx]\n",
" # constant = 1 / np.sqrt(2 * np.pi * (var**2))\n",
" # probability = np.exp(-((x-mean)**2)/(2*(var**2)))\n",
" \n",
" # pdf = constant * probability\n",
"\n",
" # return pdf"
],
"metadata": {
"id": "0Q_4TZqssUx0"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from sklearn import datasets\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# load iris dataset\n",
"iris = datasets.load_iris()\n",
"X = iris.data\n",
"y = iris.target\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)\n",
"\n"
],
"metadata": {
"id": "OQgYd2yNBH9H"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"source": [
"nb_gaus = NaiveBayes()\n",
"nb_gaus.fit(X_train, y_train)\n",
"gaus_predictions = nb_gaus.gaus_predict(X_test)\n",
"\n",
"# helper function to calculate accuracy\n",
"def get_accuracy(y_true, y_hat):\n",
" return np.sum(y_true==y_hat) / len(y_true)\n",
" \n",
"# print results\n",
"print('Naive Bayes Accuracy: ', get_accuracy(y_test, gaus_predictions))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "voX5B2-OCasD",
"outputId": "5efef2bc-670a-4479-9808-74028835bf69"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Naive Bayes Accuracy: 0.9666666666666667\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# instantiate, train and predict Naive Bayes Classifier\n",
"nb_pdf = NaiveBayes()\n",
"nb_pdf.fit(X_train, y_train)\n",
"pdf_predictions = nb_pdf.pdf_predict(X_test)\n",
"\n",
"# print results\n",
"print('Naive Bayes Accuracy: ', get_accuracy(y_test, pdf_predictions))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LSp4eD66Cw8e",
"outputId": "dc03dcf9-f375-492c-e4ca-bb5cba0a2f62"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Naive Bayes Accuracy: 0.9666666666666667\n"
]
}
]
},
{
"cell_type": "code",
"source": [
""
],
"metadata": {
"id": "w0Mvy4JDG9VF"
},
"execution_count": null,
"outputs": []
}
]
}

0 comments on commit dbe5baf

Please sign in to comment.