-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsimple_linear_regression.py
50 lines (40 loc) · 1.2 KB
/
simple_linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Import data
df = pd.read_csv('data/housing.csv')
X, y = df.GrLivArea, df.SalePrice
# Calculate Mean and Variance.
def mean(series):
N = len(series)
return sum(series) / N
def variance(series):
N = len(series)
return sum([(series[i] - mean(series))**2 for i in range(N)])
# Calculate Covariance.
def covariance(series_x, series_y):
if len(series_x == series_y):
N = len(series_x)
return sum([(series_x[i] - mean(series_x)) * (series_y[i] - mean(series_y)) for i in range(N)])
else:
raise IndexError("Sizes must be same!!")
# Estimate Coefficients.
def coefficients(X, Y):
beta_1 = covariance(X, Y) / variance(X)
beta_0 = mean(Y) - beta_1 * mean(X)
return beta_0, beta_1
# Make Predictions.
def prediction(X, coefficients):
beta_0, beta_1 = coefficients
return [beta_0 + beta_1 * x for x in X]
y_hat = prediction(X, coefficients(X, y))
# Evaluate with r^2
def rsqr(Y, y_hat):
N = len(Y)
ss_tot = sum([(Y[i] - mean(Y))**2 for i in range(N)])
ss_res = sum([(Y[i] - y_hat[i])**2 for i in range(N)])
return 1 - ss_res/ss_tot
# Plot
plt.scatter(X, y)
plt.plot(X, y_hat)
plt.show()