-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
db320e8
commit dbe5baf
Showing
1 changed file
with
214 additions
and
0 deletions.
There are no files selected for viewing
214 changes: 214 additions & 0 deletions
214
ML algorithms from scratch/Naive Bayes from scratch.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"name": "Naive Bayes from scratch.ipynb.ipynb", | ||
"provenance": [] | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"\n", | ||
"\n", | ||
"## Naive Bayes \n", | ||
"\n" | ||
], | ||
"metadata": { | ||
"id": "ZPIkyYBagucI" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"id": "32VI1xFQgtIP" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"class NaiveBayes:\n", | ||
" def fit(self,X,y):\n", | ||
" no_samples, no_features = X.shape\n", | ||
" self.no_classes = len(np.unique(y))\n", | ||
" self.mean = np.zeros((self.no_classes, no_features))\n", | ||
" self.var = np.zeros((self.no_classes, no_features))\n", | ||
" self.priors = np.zeros(self.no_classes)\n", | ||
" for c in range(self.no_classes):\n", | ||
" x_class = X[y==c]\n", | ||
" \n", | ||
" self.mean[c,:] = np.mean(x_class, axis = 0)\n", | ||
" self.var[c,:] = np.mean(x_class, axis = 0)\n", | ||
" self.priors[c] = x_class.shape[0] / float(no_features) \n", | ||
" \n", | ||
" def gaus_class_probability(self,x):\n", | ||
" posteriors = []\n", | ||
"\n", | ||
" for c in range(self.no_classes):\n", | ||
" mean = self.mean[c]\n", | ||
" var = self.var[c]\n", | ||
" prior = np.log(self.priors[c])\n", | ||
"\n", | ||
" conditional_prob = np.sum(np.log(self.gaussian_density(x,mean,var)))\n", | ||
" posterior = prior + conditional_prob\n", | ||
"\n", | ||
" posteriors.append(posterior)\n", | ||
" #return the index of the highest class probability\n", | ||
" return np.argmax(posteriors)\n", | ||
" def gaussian_density(self,x,mean,var):\n", | ||
" constant = 1 / np.sqrt(var *2 * np.pi)\n", | ||
"\n", | ||
" probability = np.exp(-0.5 * ((x - mean)**2 / var))\n", | ||
"\n", | ||
" gaus = constant * probability\n", | ||
" \n", | ||
" return gaus\n", | ||
" def gaus_predict(self,X):\n", | ||
" y_pred = [self.gaus_class_probability(x) for x in X]\n", | ||
" return np.array(y_pred)\n", | ||
"\n", | ||
"\n", | ||
"\n", | ||
"\n", | ||
" # def pdf_class_probability(self,x):\n", | ||
" # posteriors = []\n", | ||
"\n", | ||
" # for c in range(self.no_classes):\n", | ||
" # mean = self.mean[c]\n", | ||
" # var = self.var[c]\n", | ||
" # prior = np.log(self.priors[c])\n", | ||
"\n", | ||
" # conditional_prob = np.sum(np.log(self.pdf(c,x)))\n", | ||
" # posterior = prior + conditional_prob\n", | ||
" # posteriors.append(posterior)\n", | ||
" # #return the index of the highest class probability\n", | ||
" # return np.argmax(posteriors)\n", | ||
" # def pdf_predict(self,X):\n", | ||
" # y_pred = [self.gaus_class_probability(x) for x in X]\n", | ||
" # return np.array(y_pred)\n", | ||
" \n", | ||
" \n", | ||
" \n", | ||
" # def pdf(self,class_idx, x):\n", | ||
" # mean = self.mean[class_idx]\n", | ||
" # var = self.var[class_idx]\n", | ||
" # constant = 1 / np.sqrt(2 * np.pi * (var**2))\n", | ||
" # probability = np.exp(-((x-mean)**2)/(2*(var**2)))\n", | ||
" \n", | ||
" # pdf = constant * probability\n", | ||
"\n", | ||
" # return pdf" | ||
], | ||
"metadata": { | ||
"id": "0Q_4TZqssUx0" | ||
}, | ||
"execution_count": 2, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from sklearn import datasets\n", | ||
"from sklearn.model_selection import train_test_split\n", | ||
"\n", | ||
"# load iris dataset\n", | ||
"iris = datasets.load_iris()\n", | ||
"X = iris.data\n", | ||
"y = iris.target\n", | ||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)\n", | ||
"\n" | ||
], | ||
"metadata": { | ||
"id": "OQgYd2yNBH9H" | ||
}, | ||
"execution_count": 4, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"nb_gaus = NaiveBayes()\n", | ||
"nb_gaus.fit(X_train, y_train)\n", | ||
"gaus_predictions = nb_gaus.gaus_predict(X_test)\n", | ||
"\n", | ||
"# helper function to calculate accuracy\n", | ||
"def get_accuracy(y_true, y_hat):\n", | ||
" return np.sum(y_true==y_hat) / len(y_true)\n", | ||
" \n", | ||
"# print results\n", | ||
"print('Naive Bayes Accuracy: ', get_accuracy(y_test, gaus_predictions))" | ||
], | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "voX5B2-OCasD", | ||
"outputId": "5efef2bc-670a-4479-9808-74028835bf69" | ||
}, | ||
"execution_count": 8, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"Naive Bayes Accuracy: 0.9666666666666667\n" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# instantiate, train and predict Naive Bayes Classifier\n", | ||
"nb_pdf = NaiveBayes()\n", | ||
"nb_pdf.fit(X_train, y_train)\n", | ||
"pdf_predictions = nb_pdf.pdf_predict(X_test)\n", | ||
"\n", | ||
"# print results\n", | ||
"print('Naive Bayes Accuracy: ', get_accuracy(y_test, pdf_predictions))" | ||
], | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "LSp4eD66Cw8e", | ||
"outputId": "dc03dcf9-f375-492c-e4ca-bb5cba0a2f62" | ||
}, | ||
"execution_count": 10, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"Naive Bayes Accuracy: 0.9666666666666667\n" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"" | ||
], | ||
"metadata": { | ||
"id": "w0Mvy4JDG9VF" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
} | ||
] | ||
} |