Skip to content

Commit

Permalink
Feat: Added depression.model
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianCB-dev committed Nov 4, 2022
1 parent 86e75d0 commit 0394d6d
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 24 deletions.
24 changes: 0 additions & 24 deletions BECK/app.py
Original file line number Diff line number Diff line change
@@ -1,24 +0,0 @@
import pandas as pd
import nltk
nltk.download('punkt')
from preprocessing_service import Preprocesamiento
from gensim.models import Word2Vec

df_positivo = pd.read_csv('./comentarios_español_depresivos.csv', encoding='utf-8')
df_negativo = pd.read_csv('./comentarios_español_no_depresivos.csv', encoding='utf-8')
pp = Preprocesamiento()
tokens = []

comentarios_depresivos = list(df_positivo['text'])
clases_depresivos = list(df_positivo['class'])



# Entrenamiento
# vector size = 200 dimensiones
# window = Ventana referente a las palabras siguientes
# For example "stackoverflow great website for programmers" with 5 words(suppose we save the stop words great and for here) if the window size is 2 then the vector of word "stackoverflow" is directly affected by the word "great" and "website", if the window size is 5 "stackoverflow" can be directly affected by two more words "for" and "programmers". The 'affected' here means it will pull the vector of two words closer.


model = Word2Vec(sentences=tokens, vector_size=200,
window=7, workers=4, sg=1, epochs=20)
Binary file added BECK/depresion.model
Binary file not shown.
44 changes: 44 additions & 0 deletions BECK/helpers/model_creator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pandas as pd
import nltk
nltk.download('punkt')
from preprocessing_service import Preprocesamiento
from gensim.models import Word2Vec

df_positivo = pd.read_csv('./comentarios_español_depresivos.csv', encoding='utf-8')
df_negativo = pd.read_csv('./comentarios_español_no_depresivos.csv', encoding='utf-8')
pp = Preprocesamiento()
tokens = []

comentarios_depresivos = list(df_positivo['text'])
comentarios_no_depresivos = list(df_negativo['text'])

count = 1
for comentario in comentarios_depresivos:
print(f'Preprocesando comentario: {count}/{len(comentarios_depresivos) + len(comentarios_no_depresivos)}')
try:
comentario_preprocesado = pp.preprocesamiento_sin_ortografia(comentario)
tokens.append(comentario_preprocesado)
count += 1
except Exception as e:
print(
f'Error preprocesando el comentario {count}/{len(comentarios_depresivos) + len(comentarios_no_depresivos)}')
count += 1
continue


for comentario in comentarios_no_depresivos:
print(
f'Preprocesando comentario: {count}/{len(comentarios_depresivos) + len(comentarios_no_depresivos)}')
try:
comentario_preprocesado = pp.preprocesamiento_sin_ortografia(comentario)
tokens.append(comentario_preprocesado)
except Exception as e:
print(
f'Error preprocesando el comentario {count}/{len(comentarios_depresivos) + len(comentarios_no_depresivos)}')
count += 1
continue

model = Word2Vec(sentences=tokens, vector_size=200,
window=7, workers=4, sg=1, epochs=20)

model.save('depresion.model')

0 comments on commit 0394d6d

Please sign in to comment.