changed docker image and perfect file

laurabarredaagusti · Oct 9, 2022 · 52ae784 · 52ae784
1 parent 0edf0e0
commit 52ae784
Show file tree

Hide file tree

Showing 7 changed files with 74 additions and 18 deletions.
diff --git a/app/app.py b/app/app.py
@@ -2,6 +2,7 @@
 import pandas as pd
 from os import environ
 from functions import *
+import nltk
 
 app = Flask(__name__)
 
@@ -12,15 +13,21 @@ def hello():
 # 1. Devolver la predicción de los nuevos datos enviados mediante argumentos en la llamada
 @app.route('/predict', methods=['GET'])
 def predict():
+    nltk.download('stopwords')
     model = load_models('sentiment_model')
     text = get_arguments('text')
+    text = clean_text(text)
+
     df = pd.DataFrame()
     df['text'] = [text]
+
     prediction = model.predict(df['text'])
     prediction = prediction[0]
+    if prediction == 0:
+        prediction = 'The sentiment of this tweet is positive'
+    else:
+        prediction = 'The sentiment of this tweet is positive'
     return render_template('predict.html', predict=prediction)
 
-# if __name__ == '__main__':
-#   app.run(debug = True, host = '0.0.0.0', port=environ.get("PORT", 5000))
-
-# app.run()
+if __name__ == '__main__':
+  app.run(debug = True, host = '0.0.0.0', port=environ.get("PORT", 5000))
diff --git a/app/functions.py b/app/functions.py
@@ -1,9 +1,53 @@
 import pickle
 from flask import request
+from nltk.corpus import stopwords
+import re
+from nltk.stem.snowball import SnowballStemmer
 
 def load_models(model_name):
-    path = '/home/yesimtrinity/technical_test_laura/app/static/models/' + model_name
+    path = 'static/models/' + model_name
     return pickle.load(open(path,'rb'))
 
 def get_arguments(arg):
-    return request.args.get(arg, None)
+    return request.args.get(arg, None)
+
+def remove_links(text):
+    return " ".join([' ' if ('http') in word else word for word in text.split()])
+
+def remove_stopwords(text):
+    stop = stopwords.words('spanish')
+    return ' '.join([word for word in text.split() if word not in (stop)])
+
+def remove_mentions(text):
+    return re.sub(r"\@\w+[,]|\@\w+|[,]\@\w+", " ", text)
+
+def signs_tweets(text):
+    signs = re.compile("(\.)|(\;)|(\:)|(\!)|(\?)|(\¿)|(\@)|(\,)|(\")|(\()|(\))|(\[)|(\])|(\d+)|(\¡)")
+    return signs.sub(' ', text.lower())
+
+def remove_hash(text):
+    return re.sub(r"\#\w+[,]|\#\w+|[,]\#\w+", "", text)
+
+def spanish_stemmer(x):
+    stemmer = SnowballStemmer('spanish')
+    return " ".join([stemmer.stem(word) for word in x.split()])
+
+def clean_emoji(x):
+    emoji_text = re.compile("["
+                           u"\U0001F600-\U0001F64F"  # emoticons
+                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
+                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+                           u"\U00002702-\U000027B0"
+                           u"\U000024C2-\U0001F251"
+                           "]+", flags=re.UNICODE)
+    return emoji_text.sub(r'', x)
+
+def clean_text(x):
+    text = remove_links(x)
+    text = remove_stopwords(x)
+    text = remove_mentions(x)
+    text = signs_tweets(x)
+    text = remove_hash(x)
+    text = spanish_stemmer(x)
+    return clean_emoji(x)
diff --git a/app/requirements.txt b/app/requirements.txt
@@ -1,3 +1,4 @@
 Flask
 pandas
-sklearn
+sklearn
+nltk
diff --git a/app/templates/index.html b/app/templates/index.html
@@ -10,10 +10,14 @@
             display: flex;
             flex-direction: column;
             align-items: center;
-            margin: 150px;
             padding: 0px;
             background-image: url('static/images/background.jpg');
             background-size: cover;
+            background-size: cover;
+            justify-content: center;
+            height: 100%;
+            width: 100%;
+            position: absolute;
         } 
         .form {
             display: flex;
@@ -25,14 +29,14 @@
 
 <body>
 </body>
-    <h1 style="color:white;font-family:verdana;">Sentiment analysis</h1>
+    <h1 style="color:white;font-family:verdana;font-weight: 400;letter-spacing: 1px;">Sentiment analysis</h1>
 
     <div class="forms">
         <div class="form">
-            <h2 style="color:white;font-family:verdana;">Enter your text</h2>
+            <h2 style="color:white;font-family:verdana;font-size: 18px;font-weight: 300;">Enter your text</h2>
             <form method="get" action="/predict" autocomplete="on" >
-                <input type="text" name="text" /><br/>
-                <button type="submit">Submit</bsutton>
+                <textarea type="text" name="text" class="longInput" cols="30" rows="10" style="padding-left:10px;padding-right:10px;border-width:0px;margin-bottom:10px;width:300px;height:150px"></textarea><br/>
+                <button type="submit" style="font-family:verdana;background-color:white;border-width:0;padding-top:5px;padding-bottom:5px;padding-left:20px;padding-right:20px;">Submit</bsutton>
             </form>
         </div>
     </div>

diff --git a/app/templates/predict.html b/app/templates/predict.html
@@ -25,6 +25,6 @@
 
 <body>
 </body>
-    <div>{{predict}}</div>
-    <a href="/">Go Back</a>
+    <div style="color:white;font-family:verdana;padding:20px;">{{predict}}</div>
+    <a href="/" style="color:white;font-family:verdana;">Go Back</a>
 </html>
diff --git a/notebooks/eda.ipynb b/notebooks/eda.ipynb
@@ -1377,10 +1377,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def remove_metions(text):\n",
+    "def remove_mentions(text):\n",
     "    return re.sub(r\"\\@\\w+[,]|\\@\\w+|[,]\\@\\w+\", \" \", text)\n",
     "\n",
-    "df_tweets['text_mod'] = df_tweets['text_mod'].apply(lambda x: remove_metions(x))"
+    "df_tweets['text_mod'] = df_tweets['text_mod'].apply(lambda x: remove_mentions(x))"
    ]
   },
   {

diff --git a/notebooks/predictions.ipynb b/notebooks/predictions.ipynb
@@ -77,10 +77,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def remove_metions(text):\n",
+    "def remove_mentions(text):\n",
     "    return re.sub(r\"\\@\\w+[,]|\\@\\w+|[,]\\@\\w+\", \" \", text)\n",
     "\n",
-    "df_tweets['text'] = df_tweets['text'].apply(lambda x: remove_metions(x))"
+    "df_tweets['text'] = df_tweets['text'].apply(lambda x: remove_mentions(x))"
    ]
   },
   {
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
     Flask
     pandas
-    sklearn
+    sklearn
+    nltk