-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpull-tweets.py
72 lines (65 loc) · 2.37 KB
/
pull-tweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import requests
from requests_oauthlib import OAuth1
import cnfg
import json
from pymongo import MongoClient
def into_mongo(collection, results):
"""Insert twitter results into mongoDB."""
coll = collection
for tweet in results:
coll.insert_one(tweet)
def get_tweets(screen_name, since_id, call = 1, max_id = None, tweets = []):
"""Get tweets by screen name up until max tweets.
Args:
screen_name (str): screen name to get tweets for
since_id (int): get tweets more recent than this specified ID
Returns:
json object/list with tweets and associated data
"""
if call > max_calls:
return tweets
print call
search_params = {'screen_name': [screen_name],
'count': 200,
'max_id': max_id,
'since_id' : since_id}
json = requests.get('https://api.twitter.com/1.1/statuses/user_timeline.json',
params = search_params,
auth = oauth).json()
last_id = json[-1]['id']
try: # catches the first instance
if tweets[-1]['id'] == last_id: # check if we ran out of tweets
return tweets
else:
return tweets + get_tweets(screen_name, since_id, call + 1, last_id, json)
except:
return tweets + get_tweets(screen_name, since_id, call + 1, last_id, json)
def main():
# define twitter authentication keys
config = cnfg.load(".twitter_config")
oauth = OAuth1(config["consumer_key"],
config["consumer_secret"],
config["access_token"],
config["access_token_secret"])
# define screen names for candidates
candidates = {'clinton' : 'hillaryclinton',
'trump' : 'realdonaldtrump',
'rubio' : 'marcorubio',
'sanders' : 'berniesanders',
'cruz' : 'tedcruz'}
# define mongodb parameters
coll_dict = {'trump': db.trump,
'clinton' : db.clinton,
'sanders' : db.sanders,
'cruz' : db.cruz,
'rubio' : db.rubio}
client = MongoClient()
db = client.tweets
# get tweets and insert into mongodb
max_calls = 180
max_tweets = 3500
for cand in candidates:
results = get_tweets(candidates[cand], since_id = None)
into_mongo(coll_dict[cand], results)
if __name__ == '__main__':
main()