From 52ae784fca87803c7f7f78403f289cad3f960b4f Mon Sep 17 00:00:00 2001 From: lauragreemko Date: Sun, 9 Oct 2022 20:20:01 +0200 Subject: [PATCH] changed docker image and perfect file --- app/app.py | 15 ++++++++---- app/functions.py | 48 +++++++++++++++++++++++++++++++++++-- app/requirements.txt | 3 ++- app/templates/index.html | 14 +++++++---- app/templates/predict.html | 4 ++-- notebooks/eda.ipynb | 4 ++-- notebooks/predictions.ipynb | 4 ++-- 7 files changed, 74 insertions(+), 18 deletions(-) diff --git a/app/app.py b/app/app.py index f03860f..c3add91 100644 --- a/app/app.py +++ b/app/app.py @@ -2,6 +2,7 @@ import pandas as pd from os import environ from functions import * +import nltk app = Flask(__name__) @@ -12,15 +13,21 @@ def hello(): # 1. Devolver la predicción de los nuevos datos enviados mediante argumentos en la llamada @app.route('/predict', methods=['GET']) def predict(): + nltk.download('stopwords') model = load_models('sentiment_model') text = get_arguments('text') + text = clean_text(text) + df = pd.DataFrame() df['text'] = [text] + prediction = model.predict(df['text']) prediction = prediction[0] + if prediction == 0: + prediction = 'The sentiment of this tweet is positive' + else: + prediction = 'The sentiment of this tweet is positive' return render_template('predict.html', predict=prediction) -# if __name__ == '__main__': -# app.run(debug = True, host = '0.0.0.0', port=environ.get("PORT", 5000)) - -# app.run() \ No newline at end of file +if __name__ == '__main__': + app.run(debug = True, host = '0.0.0.0', port=environ.get("PORT", 5000)) \ No newline at end of file diff --git a/app/functions.py b/app/functions.py index 1d34f57..3087191 100644 --- a/app/functions.py +++ b/app/functions.py @@ -1,9 +1,53 @@ import pickle from flask import request +from nltk.corpus import stopwords +import re +from nltk.stem.snowball import SnowballStemmer def load_models(model_name): - path = '/home/yesimtrinity/technical_test_laura/app/static/models/' + model_name + path = 'static/models/' + model_name return pickle.load(open(path,'rb')) def get_arguments(arg): - return request.args.get(arg, None) \ No newline at end of file + return request.args.get(arg, None) + +def remove_links(text): + return " ".join([' ' if ('http') in word else word for word in text.split()]) + +def remove_stopwords(text): + stop = stopwords.words('spanish') + return ' '.join([word for word in text.split() if word not in (stop)]) + +def remove_mentions(text): + return re.sub(r"\@\w+[,]|\@\w+|[,]\@\w+", " ", text) + +def signs_tweets(text): + signs = re.compile("(\.)|(\;)|(\:)|(\!)|(\?)|(\¿)|(\@)|(\,)|(\")|(\()|(\))|(\[)|(\])|(\d+)|(\¡)") + return signs.sub(' ', text.lower()) + +def remove_hash(text): + return re.sub(r"\#\w+[,]|\#\w+|[,]\#\w+", "", text) + +def spanish_stemmer(x): + stemmer = SnowballStemmer('spanish') + return " ".join([stemmer.stem(word) for word in x.split()]) + +def clean_emoji(x): + emoji_text = re.compile("[" + u"\U0001F600-\U0001F64F" # emoticons + u"\U0001F300-\U0001F5FF" # symbols & pictographs + u"\U0001F680-\U0001F6FF" # transport & map symbols + u"\U0001F1E0-\U0001F1FF" # flags (iOS) + u"\U00002702-\U000027B0" + u"\U000024C2-\U0001F251" + "]+", flags=re.UNICODE) + return emoji_text.sub(r'', x) + +def clean_text(x): + text = remove_links(x) + text = remove_stopwords(x) + text = remove_mentions(x) + text = signs_tweets(x) + text = remove_hash(x) + text = spanish_stemmer(x) + return clean_emoji(x) \ No newline at end of file diff --git a/app/requirements.txt b/app/requirements.txt index 17641fd..ebc6a6f 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -1,3 +1,4 @@ Flask pandas -sklearn \ No newline at end of file +sklearn +nltk \ No newline at end of file diff --git a/app/templates/index.html b/app/templates/index.html index 76ec2e2..a0f4ede 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -10,10 +10,14 @@ display: flex; flex-direction: column; align-items: center; - margin: 150px; padding: 0px; background-image: url('static/images/background.jpg'); background-size: cover; + background-size: cover; + justify-content: center; + height: 100%; + width: 100%; + position: absolute; } .form { display: flex; @@ -25,14 +29,14 @@ -

Sentiment analysis

+

Sentiment analysis

-

Enter your text

+

Enter your text

-
-
diff --git a/app/templates/predict.html b/app/templates/predict.html index 6ac3480..4cdb161 100644 --- a/app/templates/predict.html +++ b/app/templates/predict.html @@ -25,6 +25,6 @@ -
{{predict}}
- Go Back +
{{predict}}
+ Go Back \ No newline at end of file diff --git a/notebooks/eda.ipynb b/notebooks/eda.ipynb index e73c164..a781e79 100644 --- a/notebooks/eda.ipynb +++ b/notebooks/eda.ipynb @@ -1377,10 +1377,10 @@ "metadata": {}, "outputs": [], "source": [ - "def remove_metions(text):\n", + "def remove_mentions(text):\n", " return re.sub(r\"\\@\\w+[,]|\\@\\w+|[,]\\@\\w+\", \" \", text)\n", "\n", - "df_tweets['text_mod'] = df_tweets['text_mod'].apply(lambda x: remove_metions(x))" + "df_tweets['text_mod'] = df_tweets['text_mod'].apply(lambda x: remove_mentions(x))" ] }, { diff --git a/notebooks/predictions.ipynb b/notebooks/predictions.ipynb index 92ed0e0..c650e33 100644 --- a/notebooks/predictions.ipynb +++ b/notebooks/predictions.ipynb @@ -77,10 +77,10 @@ "metadata": {}, "outputs": [], "source": [ - "def remove_metions(text):\n", + "def remove_mentions(text):\n", " return re.sub(r\"\\@\\w+[,]|\\@\\w+|[,]\\@\\w+\", \" \", text)\n", "\n", - "df_tweets['text'] = df_tweets['text'].apply(lambda x: remove_metions(x))" + "df_tweets['text'] = df_tweets['text'].apply(lambda x: remove_mentions(x))" ] }, {