Skip to content

Commit

Permalink
Feat: Added Model construction
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianCB-dev committed Nov 3, 2022
1 parent 7aba0bc commit 86e75d0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
24 changes: 16 additions & 8 deletions BECK/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,23 @@
import nltk
nltk.download('punkt')
from preprocessing_service import Preprocesamiento
from gensim.models import Word2Vec

df = pd.read_csv('./comentarios_español_depresivos.csv', encoding='utf-8')
df_positivo = pd.read_csv('./comentarios_español_depresivos.csv', encoding='utf-8')
df_negativo = pd.read_csv('./comentarios_español_no_depresivos.csv', encoding='utf-8')
pp = Preprocesamiento()
tokens = []

comentarios = list(df['text'])
clases = list(df['class'])
comentarios_depresivos = list(df_positivo['text'])
clases_depresivos = list(df_positivo['class'])

print('-- Comentario antes --')
print(comentarios[0])
print('-- Comentario despúes --')
comentario_preprocesado = pp.preprocesamiento_sin_ortografia(comentarios[0])
print(comentario_preprocesado)


# Entrenamiento
# vector size = 200 dimensiones
# window = Ventana referente a las palabras siguientes
# For example "stackoverflow great website for programmers" with 5 words(suppose we save the stop words great and for here) if the window size is 2 then the vector of word "stackoverflow" is directly affected by the word "great" and "website", if the window size is 5 "stackoverflow" can be directly affected by two more words "for" and "programmers". The 'affected' here means it will pull the vector of two words closer.


model = Word2Vec(sentences=tokens, vector_size=200,
window=7, workers=4, sg=1, epochs=20)
4 changes: 2 additions & 2 deletions BECK/preprocessing_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def preprocesamiento_con_ortografia(self, texto):
texto = self.correccion_ortografica(texto)
texto = self.stop_words(texto)
texto = self.lematizacion(texto)
texto = self.eliminar_duplicados(texto)
#texto = self.eliminar_duplicados(texto)
return texto


Expand All @@ -67,7 +67,7 @@ def preprocesamiento_sin_ortografia(self, texto):
texto = self.eliminacion_data_inutil(texto)
texto = self.stop_words(texto)
texto = self.lematizacion(texto)
texto = self.eliminar_duplicados(texto)
#texto = self.eliminar_duplicados(texto)
return texto

def eliminar_etiquetados(self, texto):
Expand Down

0 comments on commit 86e75d0

Please sign in to comment.