Skip to content

Commit

Permalink
added first files
Browse files Browse the repository at this point in the history
  • Loading branch information
lauragreemko committed Oct 8, 2022
0 parents commit 2e32d6a
Show file tree
Hide file tree
Showing 13 changed files with 606 additions and 0 deletions.
1 change: 1 addition & 0 deletions data/data1.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/data2.json

Large diffs are not rendered by default.

Binary file added info/Prueba_tecnica_DS.pdf
Binary file not shown.
Binary file added model/sentiment_model
Binary file not shown.
249 changes: 249 additions & 0 deletions notebooks/app.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"# For saving access tokens and for file management when creating and adding to the dataset\n",
"import os\n",
"# For dealing with json responses we receive from the API\n",
"import json\n",
"# For displaying the data after\n",
"import pandas as pd\n",
"# For saving the response data in CSV format\n",
"import csv\n",
"# For parsing the dates received from twitter in readable formats\n",
"import datetime\n",
"import dateutil.parser\n",
"import unicodedata\n",
"#To add wait time between requests\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"os.environ['TOKEN'] = 'AAAAAAAAAAAAAAAAAAAAAPfEhwEAAAAAElz971Kl5tLPtcriYImZyMc7TbA%3D7BSUXbf9ZESoLGtCug0zWwoT0dsN7gNfc5f9fTKTjzSA8v92BR'"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def auth():\n",
" return os.getenv('TOKEN')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def create_headers(bearer_token):\n",
" headers = {\"Authorization\": \"Bearer {}\".format(bearer_token)}\n",
" return headers"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def create_url(start_date, max_results, tweet_fields, expansions, end_time=None):\n",
" \n",
" search_url = \"https://api.twitter.com/2/users/1162694149956603904/mentions\" #Change to the endpoint you want to collect data from\n",
"\n",
" #change params based on the endpoint you are using\n",
" query_params = {'tweet.fields' : tweet_fields,\n",
" 'start_time': start_date,\n",
" 'end_time' : end_time,\n",
" 'max_results': max_results,\n",
" 'expansions': expansions,\n",
" 'next_token': {}}\n",
" return (search_url, query_params)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def connect_to_endpoint(url, headers, params, next_token = None):\n",
" params['next_token'] = next_token #params object received from create_url function\n",
" response = requests.request(\"GET\", url, headers = headers, params = params)\n",
" print(\"Endpoint Response Code: \" + str(response.status_code))\n",
" if response.status_code != 200:\n",
" raise Exception(response.status_code, response.text)\n",
" return response.json()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"start_date = '2022-06-12T00:00:00.000Z'\n",
"# end_time = \"2022-06-29T00:00:00.000Z\"\n",
"max_results = 100\n",
"tweet_fields = 'created_at,public_metrics'\n",
"expansions = 'author_id'\n",
"bearer_token = auth()\n",
"headers = create_headers(bearer_token)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"url = create_url(start_date, max_results, tweet_fields, expansions)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Endpoint Response Code: 200\n"
]
}
],
"source": [
"json_response = connect_to_endpoint(url[0], headers, url[1])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"retweet_count = json_response['data'][3]['public_metrics']['retweet_count']\n",
"reply_count = json_response['data'][3]['public_metrics']['reply_count']\n",
"like_count = json_response['data'][3]['public_metrics']['like_count']\n",
"quote_count = json_response['data'][3]['public_metrics']['quote_count']"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"100"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"json_response['meta']['result_count']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def save_json(number, json_response):\n",
" filename = 'data' + str(number) + '.json'\n",
" path = 'data/' + filename\n",
" with open(path, 'w') as f:\n",
" json.dump(json_response, f)"
]
},
{
"cell_type": "code",
"execution_count": 331,
"metadata": {},
"outputs": [],
"source": [
"def get_all_tweets(start_date, max_results, tweet_fields, expansions, end_time=None):\n",
" url = create_url(start_date, max_results, tweet_fields, expansions)\n",
" json_response = connect_to_endpoint(url[0], headers, url[1])\n",
" save_json(1, json_response)\n",
" end_time = json_response['data'][-1]['created_at']\n",
" url = create_url(start_date, max_results, tweet_fields, expansions, end_time)\n",
" json_response = connect_to_endpoint(url[0], headers, url[1])\n",
" save_json(2, json_response)\n",
" return json_response"
]
},
{
"cell_type": "code",
"execution_count": 332,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Endpoint Response Code: 200\n",
"2022-06-28T07:50:34.000Z\n"
]
}
],
"source": [
"json_response = get_all_tweets(start_date, max_results, tweet_fields, expansions)"
]
},
{
"cell_type": "code",
"execution_count": 333,
"metadata": {},
"outputs": [],
"source": [
"# print(json.dumps(json_response, indent=4, sort_keys=True))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.4 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 2e32d6a

Please sign in to comment.