-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
113 lines (77 loc) · 2.33 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import requests
import config
import sys
from bs4 import BeautifulSoup
def get_artist_id(artist_name):
artist_id = None
params = {'q': artist_name}
response = requests.get(base_url+'/search', params=params, headers=headers)
json = response.json()
for hit in json["response"]["hits"]:
if (hit["result"]["primary_artist"]["name"]==artist_name) and (hit["type"]=="song"):
artist_id = hit["result"]["primary_artist"]["id"]
break
if artist_id:
print("Collecting songs for", artist_name, ": Artist ID", artist_id)
return artist_id
else:
print("Artist", artist_name, "not found.")
sys.exit(0)
def get_song_urls(artist_id):
urls = []
page_count = 1
end_of_pages = False
while not(end_of_pages):
# get results of current page
temp_url = base_url+'/artists/'+str(artist_id)+'/songs?per_page=50&page='+str(page_count)
response = requests.get(temp_url, headers=headers)
json = response.json()
for song in json["response"]["songs"]:
if song["primary_artist"]["id"]==artist_id:
urls.append(song["path"])
if json["response"]["next_page"]==None:
end_of_pages=True
else:
page_count+=1
print(len(urls), "songs found")
return urls
def get_lyrics(song_urls):
lyrics = []
for url in song_urls:
page = requests.get("https://genius.com" + url)
if page:
html = BeautifulSoup(page.text, "html.parser")
[h.extract() for h in html("script")]
text = html.find("div",class_="lyrics").get_text()
if "Lyrics will be available" not in text:
lyrics.append(text)
lyrics.append("<EOS>")
if (len(lyrics)/2)%100==0:
print(int(len(lyrics)/2), "/", len(song_urls), "lyrics processed")
return lyrics
if __name__ == '__main__':
# artist input
if len(sys.argv[1:])==0:
print("You must pass a parameter for artist name e.g. \"Kanye West\"")
sys.exit(0)
else:
artist_name = sys.argv[1]
# api key and authorisation
base_url = "http://api.genius.com"
api_key = config.access_token
auth_string = 'Bearer ' + api_key
headers = {'Authorization': auth_string}
# get lyrics
artist_id = get_artist_id(artist_name)
song_urls = get_song_urls(artist_id)
lyrics = get_lyrics(song_urls)
# save to txt file
if len(sys.argv)>2:
filename = sys.argv[2] + ".txt"
else:
filename = "lyrics.txt"
f = open(filename,"w")
for l in lyrics:
f.write(l)
f.close()
print("File saved at",filename)