-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgoogle-finance-scraper.py
98 lines (63 loc) · 2.04 KB
/
google-finance-scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from bs4 import BeautifulSoup
import requests
import json
def get_price(soup_element):
price = soup_element.find('main').find('div','AHmHk').get_text()
return price
def get_change(soup_element):
change = soup_element.find('main').find('div','JwB6zf').get_text()
return change
def get_name(soup_element):
name = soup_element.find('main').find('div','zzDege').get_text()
return name
def save_results(results, filepath):
with open(filepath, 'w', encoding='utf-8') as file:
json.dump(results, file, ensure_ascii=False, indent=4)
return
def get_finance_html(url):
payload = {
'source': 'google',
'render': 'html',
'url': url,
}
response = requests.request(
'POST',
'https://realtime.oxylabs.io/v1/queries',
auth=('username', 'password'),
json=payload,
)
response_json = response.json()
html = response_json['results'][0]['content']
return html
def extract_finance_information_from_soup(soup_of_the_whole_page):
price = get_price(soup_of_the_whole_page)
change = get_change(soup_of_the_whole_page)
name = get_name(soup_of_the_whole_page)
listing = {
"name": name,
"change": change,
"price": price
}
return listing
def extract_finance_data_from_urls(urls):
constructed_finance_results = []
for url in urls:
html = get_finance_html(url)
soup = BeautifulSoup(html,'html.parser')
finance = extract_finance_information_from_soup(soup)
constructed_finance_results.append({
'url': url,
'data': finance
})
return constructed_finance_results
def main():
results_file = 'data.json'
urls = [
'https://www.google.com/finance/quote/BNP:EPA?hl=en',
'https://www.google.com/finance/quote/.DJI:INDEXDJX?hl=en',
'https://www.google.com/finance/quote/.INX:INDEXSP?hl=en'
]
constructed_finance_results = extract_finance_data_from_urls(urls)
save_results(constructed_finance_results, results_file)
if __name__ == "__main__":
main()