-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTask3.py
95 lines (84 loc) · 2.94 KB
/
Task3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
## In this task, I have arranged all the movies according to the decades, means arranged after every 10 years.
import requests
from bs4 import BeautifulSoup
url = "https://www.imdb.com/india/top-rated-indian-movies/?ref_=nv_mv_250_in"
page = requests.get(url)
text = page.text
parse = BeautifulSoup(text, "html.parser")
# h1 = parse.find('h1').text
# print (h1)
# title = parse.find('title').text
# print (title)
lister = parse.find('div', class_ = "lister")
# print (lister)
tbody = lister.find('tbody', class_ = "lister-list")
# print (tbody)
trs = tbody.find_all('tr')
# print (trs)
############# to find the position of movies:
position = []
movies_list = []
years_of_release = []
ratings_list = []
movies_links = []
for tr in trs:
# print (tr)
# print ("-------------------------------------------------------------------------------------------------------------------------------------")
tds = tr.find('td', class_ = "titleColumn").get_text().strip()
# print (tds)
# print ("-------------------------------------------------------------------------------------------------------------------------------------")
string = ""
for i in tds:
if "." != i:
string = string+i
else:
position.append(int(string))
break
# print (position)
all_in_one = []
for tr in trs:
main_link = tr.find('td', class_ = "titleColumn")
# to print the name of every movie
name = main_link.find('a').get_text()
movies_list.append(name)
# to print the year_of_release of every movie
year = main_link.find('span').text
years_of_release.append(year)
# to print the link of every movie
site_links = tr.find('td', class_ = "titleColumn").a['href']
link_to_movie = "http://www.imdb.com" + (site_links)
movies_links.append(link_to_movie)
# to print the rating of every movie
ratingtd = tr.find('td', class_ = "ratingColumn imdbRating")
ratings = ratingtd.find('strong').text
ratings_list.append(ratings)
for i in range(len(movies_list)):
b = {
'position': position[i],
'name': movies_list[i],
'year': years_of_release[i],
'rating': ratings_list[i],
'url': movies_links[i],
}
all_in_one.append(b)
# print (all_in_one)
# print (movies_list)
# print (years_of_release)
# print (ratings_list)
# print (movies_links)
#######################-------------Task3-----------------######################
year = '(1971)'
for i in years_of_release:
if int(i[1:-1]) < int(year[1:-1]):
year = i
# print (year) # to find the minimum year of movies # 1955
main_dic_task3 = {}
for i in range(1950, 2020, 10):
decade_list = []
for j in range(i, i+10):
for k in all_in_one:
if int(k['year'][1:-1]) == j:
dic = {'name': k['name'], 'year_of_release': j, 'position': k['position'], 'rating': k['rating'], 'url': k['url']}
decade_list.append(dic)
main_dic_task3[i] = decade_list
print (main_dic_task3)