-
Notifications
You must be signed in to change notification settings - Fork 2
/
cosine_sim.py
34 lines (29 loc) · 1.18 KB
/
cosine_sim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pymysql
import pymysql.cursors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
connection = pymysql.connect(host="localhost",
port=3306,
user="gogogoAquarius",
password="chenpengjiayou19",
db="gogogoAquarius$db_movies",
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
sql = "SELECT keywords from movie"
insert_sql = "INSERT INTO recommend (movieId, recId) VALUES (%s, %s) "
with connection.cursor() as cursor:
cursor.execute(sql)
movies = cursor.fetchall()
for i, row in enumerate(movies):
movies[i] = row['keywords']
movies = TfidfVectorizer().fit_transform(movies)
sims = cosine_similarity(movies)
res = []
for row in sims:
most_sim = [i[0]+1 for i in sorted(enumerate(row), key=lambda x:x[1], reverse=True)[1:11]]
res.append(most_sim)
for index, row in enumerate(res):
for rec in row:
cursor.execute(insert_sql%(index+1, rec))
connection.commit()
connection.close()