-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecommendations.py
144 lines (127 loc) · 4.94 KB
/
recommendations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import sys
from pymongo import MongoClient
from getdistance import totaldistance
from random import sample
import random
genrelist = ['블랙코미디', 'SF', '애니메이션', '느와르', '서스펜스', '모험', '멜로/로맨스', '서사', '코미디', '다큐멘터리', '서부', '무협', '전쟁', '드라마', '스릴러', '에로', '가족', '액션', '공연실황', '공포', '범죄', '판타지', '미스터리', '실험', '뮤지컬']
def genretogenre(dbh):
#first initialize all the genre items with db
dbh.genrecounts.delete_many({})
for genre in genrelist:
if dbh.genrecounts.find({'type': genre}).count() < 1:
counter = {}
for item in genrelist:
if item != genre:
counter[item] = 0
counter['type'] = genre
#print("inserting:", counter)
dbh.genrecounts.insert_one(counter)
#print("Inserting:", counter)
for movie in list(dbh.movielist.find({'valid': True})):
if('genre' not in movie):
continue
for x in movie['genre']:
for y in movie['genre']:
if x != y:
dbh.genrecounts.update_one({'type': x}, {'$inc': {y: 1}})
#print("Incrementing", x, "for", y)
def choosegenre(genre, dbh, seen, cands=10):
thisgenre = dbh.genrecounts.find_one({'type': genre})
counter = {}
total = 0
originalgenre = genre#original genre that is being referenced
for gen in thisgenre:
if gen != '_id' and gen != 'type':
counter[gen] = thisgenre[gen]
total += counter[gen]
else:
continue
counter[originalgenre] = total * 2 // 3
total = total + counter[genre]
keys = list(counter.keys())
#print(keys)
ranges = [0]
for i in range(len(keys)):
ranges.append(ranges[i] + counter[keys[i]])
#print(counter[key])
genrecands = {}
for key in counter:
if counter[key] != 0:
genrecands[key] = []
#include the original genre among the candidates
movielist = list(dbh.movielist.find({'valid': True, 'genre': {'$in': [key, originalgenre]}}).sort('rating', -1))
while(len(genrecands[key]) < cands and movielist != []):
movie = movielist.pop(0)
if movie['code'] not in seen:
genrecands[key].append(movie)
random.shuffle(genrecands[key])
ret = []
genrespicked = []
for _ in range(cands):
val = random.random() * total
found = 0
for i in range(len(keys)):
if ranges[i] < val < ranges[i + 1] and counter[keys[i]] != 0:
found = i
ret.append(genrecands[keys[i]].pop())
genrespicked.append(keys[i])
break
return ret
def removemovie(code, checklist):
i = 0
while i < len(checklist):
if(checklist[i]["code"] == code):
checklist.pop(i)
return
i += 1
def findmovies(movieseen, dbh):
ret = []
for code in movieseen:
movie = dbh.movielist.find_one({"code": code})
if movie:
ret.append(movie)
return ret
def addedtotaldistance(movielist, movieb, dbh):
ret = 0
for moviea in movielist:
ret += totaldistance(moviea, movieb, dbh)
return ret
def main():
try:
c = MongoClient(host='localhost', port=27018)
dbh = c['moviedb']
except:
print("Unable to connect!")
sys.exit(1)
genretogenre(dbh)
with open('movieseen.txt', 'r') as movieseen:
movieseen = movieseen.read().split()
ret = []
for movie in movieseen:
actual_movie = dbh.movielist.find_one({'code': movie})
if not actual_movie:
continue
genres = dbh.movielist.find_one({'code': movie})['genre']
# print(genres)
for genre in genres:
ret = ret + choosegenre(genre, dbh, movieseen)
ret = list(set([code['code'] for code in ret]))
ret.sort(key=lambda x: dbh.movielist.find_one({'code': x})['rating'])
print("Recommendations based on ratings and genre preference")
print()
for movie in ret[:10]:
print(dbh.movielist.find_one({'code': movie})['title'], dbh.movielist.find_one({'code': movie})['code'])
print()
print("Recommendations based on total distance algorithm")
checklist = list(dbh.movielist.find({"valid": True}))
# grabbing movielist
movielist = findmovies(movieseen, dbh)
# removing seen movies from recommendation list
for code in movieseen:
removemovie(code, checklist)
checklist.sort(key = lambda x: addedtotaldistance(movielist, x, dbh))
candidates = sample(checklist[:40], 10)
for candidate in candidates:
print(candidate["title"], candidate["code"], candidate["rating"])
if __name__ == '__main__':
main()