-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy path1. regression.py
29 lines (26 loc) · 934 Bytes
/
1. regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from movies import movie_dataset, movie_ratings
def distance(movie1, movie2):
squared_difference = 0
for i in range(len(movie1)):
squared_difference += (movie1[i] - movie2[i]) ** 2
final_distance = squared_difference ** 0.5
return final_distance
def predict(unknown, dataset, movie_ratings, k):
distances = []
#Looping through all points in the dataset
for title in dataset:
movie = dataset[title]
distance_to_point = distance(movie, unknown)
#Adding the distance and point associated with that distance
distances.append([distance_to_point, title])
distances.sort()
#Taking only the k closest points
neighbors = distances[0:k]
sum = 0
for neighbor in neighbors:
title = neighbor[1]
sum += movie_ratings[title]
return sum/len(neighbors)
print(movie_dataset["Life of Pi"])
print(movie_ratings["Life of Pi"])
print(predict([0.016, 0.300, 1.022], movie_dataset, movie_ratings, 5))