-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetposter.py
58 lines (42 loc) · 1.49 KB
/
getposter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#-*- coding: utf-8 -*-
import requests, codecs, urllib, re
import urllib.request
from bs4 import BeautifulSoup
from pymongo import MongoClient
from multithreading import multi_threading
def getposter(movie,dbh):
baseurl = 'http://movie.naver.com/movie/bi/mi/basic.nhn?code='
moviecode = movie['code']
code = str(moviecode)
movieurl = baseurl + code
url = urllib.request.urlopen(movieurl)
data = url.read()
soup = BeautifulSoup(data,'html.parser')
links = soup.find_all("div",{"class":"poster"})[0]
for img in links.findAll('img'):
return img.get('src')
def test():
baseurl = "http://movie.naver.com/movie/bi/mi/basic.nhn?code="
code = 146469 # 특정 영화로 test해보는함수..
urlsum = baseurl + str(code)
url = urllib.request.urlopen(urlsum)
data = url.read()
soup = BeautifulSoup(data,'html.parser')
links = soup.find_all("div",{"class":"poster"})[0]
for img in links.findAll('img'):
print(img.get('src'))
def iteratemovie(dbh):
data = list(dbh.movielist.find({"valid": True}))
multi_threading(getposter, [[movie, dbh] for movie in data], 20)
def main():
test()
##try:
## c = MongoClient(host="localhost", port=27018)
## dbh = c['moviedb']
## iteratemovie(dbh)
##except Exception as inst:
## print(inst)
## print("Error connecting to database!")
## return
if __name__ == '__main__':
main()