added first files

laurabarredaagusti · Oct 8, 2022 · 2e32d6a · 2e32d6a
commit 2e32d6a
Show file tree

Hide file tree

Showing 13 changed files with 606 additions and 0 deletions.
diff --git a/data/data1.json b/data/data1.json
diff --git a/data/data2.json b/data/data2.json
diff --git a/info/Prueba_tecnica_DS.pdf b/info/Prueba_tecnica_DS.pdf
diff --git a/model/sentiment_model b/model/sentiment_model
diff --git a/notebooks/app.ipynb b/notebooks/app.ipynb
@@ -0,0 +1,249 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "# For saving access tokens and for file management when creating and adding to the dataset\n",
+    "import os\n",
+    "# For dealing with json responses we receive from the API\n",
+    "import json\n",
+    "# For displaying the data after\n",
+    "import pandas as pd\n",
+    "# For saving the response data in CSV format\n",
+    "import csv\n",
+    "# For parsing the dates received from twitter in readable formats\n",
+    "import datetime\n",
+    "import dateutil.parser\n",
+    "import unicodedata\n",
+    "#To add wait time between requests\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ['TOKEN'] = 'AAAAAAAAAAAAAAAAAAAAAPfEhwEAAAAAElz971Kl5tLPtcriYImZyMc7TbA%3D7BSUXbf9ZESoLGtCug0zWwoT0dsN7gNfc5f9fTKTjzSA8v92BR'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def auth():\n",
+    "    return os.getenv('TOKEN')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_headers(bearer_token):\n",
+    "    headers = {\"Authorization\": \"Bearer {}\".format(bearer_token)}\n",
+    "    return headers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_url(start_date, max_results, tweet_fields, expansions, end_time=None):\n",
+    "    \n",
+    "    search_url = \"https://api.twitter.com/2/users/1162694149956603904/mentions\" #Change to the endpoint you want to collect data from\n",
+    "\n",
+    "    #change params based on the endpoint you are using\n",
+    "    query_params = {'tweet.fields' : tweet_fields,\n",
+    "                    'start_time': start_date,\n",
+    "                    'end_time' : end_time,\n",
+    "                    'max_results': max_results,\n",
+    "                    'expansions': expansions,\n",
+    "                    'next_token': {}}\n",
+    "    return (search_url, query_params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def connect_to_endpoint(url, headers, params, next_token = None):\n",
+    "    params['next_token'] = next_token   #params object received from create_url function\n",
+    "    response = requests.request(\"GET\", url, headers = headers, params = params)\n",
+    "    print(\"Endpoint Response Code: \" + str(response.status_code))\n",
+    "    if response.status_code != 200:\n",
+    "        raise Exception(response.status_code, response.text)\n",
+    "    return response.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_date = '2022-06-12T00:00:00.000Z'\n",
+    "# end_time = \"2022-06-29T00:00:00.000Z\"\n",
+    "max_results = 100\n",
+    "tweet_fields = 'created_at,public_metrics'\n",
+    "expansions = 'author_id'\n",
+    "bearer_token = auth()\n",
+    "headers = create_headers(bearer_token)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = create_url(start_date, max_results, tweet_fields, expansions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Endpoint Response Code: 200\n"
+     ]
+    }
+   ],
+   "source": [
+    "json_response = connect_to_endpoint(url[0], headers, url[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retweet_count = json_response['data'][3]['public_metrics']['retweet_count']\n",
+    "reply_count = json_response['data'][3]['public_metrics']['reply_count']\n",
+    "like_count = json_response['data'][3]['public_metrics']['like_count']\n",
+    "quote_count = json_response['data'][3]['public_metrics']['quote_count']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "json_response['meta']['result_count']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def save_json(number, json_response):\n",
+    "    filename = 'data' + str(number) + '.json'\n",
+    "    path = 'data/' + filename\n",
+    "    with open(path, 'w') as f:\n",
+    "        json.dump(json_response, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 331,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_all_tweets(start_date, max_results, tweet_fields, expansions, end_time=None):\n",
+    "    url = create_url(start_date, max_results, tweet_fields, expansions)\n",
+    "    json_response = connect_to_endpoint(url[0], headers, url[1])\n",
+    "    save_json(1, json_response)\n",
+    "    end_time = json_response['data'][-1]['created_at']\n",
+    "    url = create_url(start_date, max_results, tweet_fields, expansions, end_time)\n",
+    "    json_response = connect_to_endpoint(url[0], headers, url[1])\n",
+    "    save_json(2, json_response)\n",
+    "    return json_response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 332,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Endpoint Response Code: 200\n",
+      "2022-06-28T07:50:34.000Z\n"
+     ]
+    }
+   ],
+   "source": [
+    "json_response = get_all_tweets(start_date, max_results, tweet_fields, expansions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 333,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print(json.dumps(json_response, indent=4, sort_keys=True))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.7.4 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}