Skip to content

Commit

Permalink
Feat: Updated depresion.model
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianCB-dev committed Nov 7, 2022
1 parent 0394d6d commit 66e24f3
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 44 deletions.
55 changes: 55 additions & 0 deletions BECK/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import pandas as pd
from preprocessing_service import Preprocesamiento
import json
import pprint
from model_word2vec_service import ModelWord2Vec
import nltk
nltk.download('punkt')
df = pd.read_csv("./datasets/comentarios_full.csv", encoding='utf-8')
comments = list(df["text"])
classes = list(df["class"])

# df_prueba_chelsea = df_prueba_chelsea.append({'Nombre': 'Vaughn', 'Numero de camiseta':'33'}, ignore_index=True)
preprocesamiento = Preprocesamiento()

w2v = ModelWord2Vec()
df_cve = pd.read_csv('./datasets/coseno.csv', encoding='utf-8')
columns = list(df_cve.columns)[2:]
inicio = 6000
fin = 8000
class_comment = 0
# Lectura beck
beck_data_preprocessing = {}
try:
if open('./JSON/items_preprocessing.json', 'r'):
beck_data_preprocessing = json.loads(
open('./JSON/items_preprocessing.json', 'r', encoding='utf-8').read())
except Exception as e:
print(f'Error: {e}')

for comment in comments[inicio:fin]:
try:
print(f'Comentario: {class_comment + 1}/{len(comments)}')
new_comment = {}
contador = 0
new_comment["Comentario"] = comment
comment_preprocesado = preprocesamiento.preprocesamiento_con_ortografia(
comment)
new_comment["Comentario Preprocesado"] = comment_preprocesado
w2v.add_corpus(comment_preprocesado)
for item in beck_data_preprocessing.keys():
for result in beck_data_preprocessing[item].keys():
new_comment[columns[contador]] = w2v.get_cosine_similarity(comment_preprocesado, beck_data_preprocessing[item][result]["data"])
contador += 1

# Add to dataframe
new_comment["Clase"] = classes[class_comment]
df_cve = df_cve.append(new_comment, ignore_index=True)
class_comment += 1
except Exception as e:
print(f'Error en el comentario {class_comment} omitiendo...')
print(e)
class_comment += 1
continue

df_cve.to_csv('./datasets/coseno.csv', index=False, encoding="utf-8")
7 changes: 0 additions & 7 deletions BECK/comentarios_test.txt

This file was deleted.

Binary file removed BECK/datasets/DATASET_ENTRENAMIENTO.xlsx
Binary file not shown.
Binary file modified BECK/depresion.model
Binary file not shown.
2 changes: 1 addition & 1 deletion BECK/helpers/coseno_vs_euclidian.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
new_comment = {}
contador = 0
new_comment["Comentario"] = comment
comment_preprocesado = preprocesamiento.preprocesamiento_sin_ortografia(
comment_preprocesado = preprocesamiento.preprocesamiento_con_ortografia(
comment)
new_comment["Comentario Preprocesado"] = comment_preprocesado
w2v.add_corpus(comment_preprocesado)
Expand Down
36 changes: 2 additions & 34 deletions BECK/model_word2vec_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def __init__(self):
"""
La función toma una lista de palabras y devuelve una lista de vectores.
"""
self.model = Word2Vec.load('word2vec.model')
self.model = Word2Vec.load('./depresion.model')

def get_model(self):
"""
Expand All @@ -32,39 +32,7 @@ def save_model(self):
"""
Esta funcion guarda el modelo
"""
self.model.save('word2vec.model')

def get_euclidian_distance(self, corpus_a, corpus_b):
"""
Si la longitud de las dos listas no es igual, agregue ceros a la lista más corta hasta que sean
iguales. Luego, devuelva la distancia euclidiana entre las dos listas.
:param corpus_a: El primer corpus a comparar
:param corpus_b: El segundo corpus a comparar
:return: La distancia euclidiana entre dos vectores.
"""
vector_corpus_a = self.get_word_vector(corpus_a)
vector_corpus_a = list(np.array(vector_corpus_a).tolist())
vector_corpus_b = self.get_word_vector(corpus_b)
vector_corpus_b = list(np.array(vector_corpus_b).tolist())
#Diferencia entre ambos
if len(vector_corpus_a) != len( vector_corpus_b):
diferencia = abs(len(vector_corpus_a) - len( vector_corpus_b))
if len(vector_corpus_a) > len( vector_corpus_b):
i = 0
while i < diferencia:
vector_corpus_b.append(self.getVector250())
i += 1
return np.linalg.norm(np.array(vector_corpus_a) - np.array(vector_corpus_b))
if len(vector_corpus_a) < len(vector_corpus_b):
i = 0
while i < diferencia:
vector_corpus_a.append(self.getVector250())
i += 1
return np.linalg.norm(np.array(vector_corpus_a) - np.array(vector_corpus_b))
else:
return np.linalg.norm(np.array(vector_corpus_a) - np.array(vector_corpus_b))

self.model.save('depresion.model')

def get_cosine_similarity(self, corpus_a, corpus_b):
"""
Expand Down
4 changes: 2 additions & 2 deletions BECK/preprocessing_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ def preprocesamiento_con_ortografia(self, texto):
"""

# Eliminar etiquetas y hashtags
texto = self.normalizar(texto)
texto = self.eliminar_etiquetados(texto)
texto = self.eliminar_emojis(texto)
texto = self.eliminacion_data_inutil(texto)
texto = self.correccion_ortografica(texto)
texto = self.normalizar(texto)
texto = self.stop_words(texto)
texto = self.lematizacion(texto)
#texto = self.eliminar_duplicados(texto)
texto = self.eliminar_duplicados(texto)
return texto


Expand Down
Binary file removed BECK/word2vec.model
Binary file not shown.

0 comments on commit 66e24f3

Please sign in to comment.