diff --git a/ML algorithms from scratch/Naive Bayes from scratch.ipynb b/ML algorithms from scratch/Naive Bayes from scratch.ipynb new file mode 100644 index 0000000..b6543af --- /dev/null +++ b/ML algorithms from scratch/Naive Bayes from scratch.ipynb @@ -0,0 +1,225 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Naive Bayes from scratch.ipynb.ipynb", + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "## Naive Bayes \n", + "\n" + ], + "metadata": { + "id": "ZPIkyYBagucI" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "32VI1xFQgtIP" + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "source": [ + "class NaiveBayes:\n", + " def fit(self,X,y):\n", + " no_samples, no_features = X.shape\n", + " self.no_classes = len(np.unique(y))\n", + " self.mean = np.zeros((self.no_classes, no_features))\n", + " self.var = np.zeros((self.no_classes, no_features))\n", + " self.priors = np.zeros(self.no_classes)\n", + " for c in range(self.no_classes):\n", + " x_class = X[y==c]\n", + " \n", + " self.mean[c,:] = np.mean(x_class, axis = 0)\n", + " self.var[c,:] = np.mean(x_class, axis = 0)\n", + " self.priors[c] = x_class.shape[0] / float(no_features) \n", + " \n", + " def gaus_class_probability(self,x):\n", + " posteriors = []\n", + "\n", + " for c in range(self.no_classes):\n", + " mean = self.mean[c]\n", + " var = self.var[c]\n", + " prior = np.log(self.priors[c])\n", + "\n", + " conditional_prob = np.sum(np.log(self.gaussian_density(x,mean,var)))\n", + " posterior = prior + conditional_prob\n", + "\n", + " posteriors.append(posterior)\n", + " #return the index of the highest class probability\n", + " return np.argmax(posteriors)\n", + " def gaussian_density(self,x,mean,var):\n", + " constant = 1 / np.sqrt(var *2 * np.pi)\n", + "\n", + " probability = np.exp(-0.5 * ((x - mean)**2 / var))\n", + "\n", + " gaus = constant * probability\n", + " \n", + " return gaus\n", + " def gaus_predict(self,X):\n", + " y_pred = [self.gaus_class_probability(x) for x in X]\n", + " return np.array(y_pred)\n", + "\n", + "\n", + "\n", + "\n", + " # def pdf_class_probability(self,x):\n", + " # posteriors = []\n", + "\n", + " # for c in range(self.no_classes):\n", + " # mean = self.mean[c]\n", + " # var = self.var[c]\n", + " # prior = np.log(self.priors[c])\n", + "\n", + " # conditional_prob = np.sum(np.log(self.pdf(c,x)))\n", + " # posterior = prior + conditional_prob\n", + " # posteriors.append(posterior)\n", + " # #return the index of the highest class probability\n", + " # return np.argmax(posteriors)\n", + " # def pdf_predict(self,X):\n", + " # y_pred = [self.gaus_class_probability(x) for x in X]\n", + " # return np.array(y_pred)\n", + " \n", + " \n", + " \n", + " # def pdf(self,class_idx, x):\n", + " # mean = self.mean[class_idx]\n", + " # var = self.var[class_idx]\n", + " # constant = 1 / np.sqrt(2 * np.pi * (var**2))\n", + " # probability = np.exp(-((x-mean)**2)/(2*(var**2)))\n", + " \n", + " # pdf = constant * probability\n", + "\n", + " # return pdf" + ], + "metadata": { + "id": "0Q_4TZqssUx0" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from sklearn import datasets\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# load iris dataset\n", + "iris = datasets.load_iris()\n", + "X = iris.data\n", + "y = iris.target\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)\n", + "\n" + ], + "metadata": { + "id": "OQgYd2yNBH9H" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "nb_gaus = NaiveBayes()\n", + "nb_gaus.fit(X_train, y_train)\n", + "gaus_predictions = nb_gaus.gaus_predict(X_test)\n", + "\n", + "# helper function to calculate accuracy\n", + "def get_accuracy(y_true, y_hat):\n", + " return np.sum(y_true==y_hat) / len(y_true)\n", + " \n", + "# print results\n", + "print('Naive Bayes Accuracy: ', get_accuracy(y_test, gaus_predictions))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "voX5B2-OCasD", + "outputId": "5efef2bc-670a-4479-9808-74028835bf69" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Naive Bayes Accuracy: 0.9666666666666667\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# instantiate, train and predict Naive Bayes Classifier\n", + "nb_pdf = NaiveBayes()\n", + "nb_pdf.fit(X_train, y_train)\n", + "pdf_predictions = nb_pdf.pdf_predict(X_test)\n", + "\n", + "# print results\n", + "print('Naive Bayes Accuracy: ', get_accuracy(y_test, pdf_predictions))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LSp4eD66Cw8e", + "outputId": "dc03dcf9-f375-492c-e4ca-bb5cba0a2f62" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Naive Bayes Accuracy: 0.9666666666666667\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "[medium](https://towardsdatascience.com/implementing-naive-bayes-from-scratch-df5572e042ac)\n", + "\n", + "[medium](https://towardsdatascience.com/implementing-naive-bayes-algorithm-from-scratch-python-c6880cfc9c41)" + ], + "metadata": { + "id": "p9mNsBOlM804" + } + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "vvq0RCTmNPiL" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file