Skip to content

Commit

Permalink
Feat: Added functions to fill the BECK
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianCB-dev committed Dec 4, 2022
1 parent ba7b103 commit 10fdb7e
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 69 deletions.
62 changes: 12 additions & 50 deletions BECK/app.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,17 @@
import pandas as pd
from preprocessing_service import Preprocesamiento
import json
import pprint
from model_word2vec_service import ModelWord2Vec
import nltk
nltk.download('punkt')
df = pd.read_csv("./datasets/comentarios_full.csv", encoding='utf-8')
comments = list(df["text"])
classes = list(df["class"])

# df_prueba_chelsea = df_prueba_chelsea.append({'Nombre': 'Vaughn', 'Numero de camiseta':'33'}, ignore_index=True)
preprocesamiento = Preprocesamiento()

pp = Preprocesamiento()
w2v = ModelWord2Vec()
df_cve = pd.read_csv('./datasets/coseno.csv', encoding='utf-8')
columns = list(df_cve.columns)[2:]
inicio = 6000
fin = 8000
class_comment = 0
# Lectura beck
beck_data_preprocessing = {}
try:
if open('./JSON/items_preprocessing.json', 'r'):
beck_data_preprocessing = json.loads(
open('./JSON/items_preprocessing.json', 'r', encoding='utf-8').read())
except Exception as e:
print(f'Error: {e}')

for comment in comments[inicio:fin]:
try:
print(f'Comentario: {class_comment + 1}/{len(comments)}')
new_comment = {}
contador = 0
new_comment["Comentario"] = comment
comment_preprocesado = preprocesamiento.preprocesamiento_con_ortografia(
comment)
new_comment["Comentario Preprocesado"] = comment_preprocesado
w2v.add_corpus(comment_preprocesado)
for item in beck_data_preprocessing.keys():
for result in beck_data_preprocessing[item].keys():
new_comment[columns[contador]] = w2v.get_cosine_similarity(comment_preprocesado, beck_data_preprocessing[item][result]["data"])
contador += 1

# Add to dataframe
new_comment["Clase"] = classes[class_comment]
df_cve = df_cve.append(new_comment, ignore_index=True)
class_comment += 1
except Exception as e:
print(f'Error en el comentario {class_comment} omitiendo...')
print(e)
class_comment += 1
continue

df_cve.to_csv('./datasets/coseno.csv', index=False, encoding="utf-8")
comentario = "Estoy muy triste y no se que hacer"
# Escribe un comentario triste y largo
comentario = "Estoy muy triste y no se que hacer es un día nublado y esta lloviendo"
# Preprocesado del comentario
comentario_procesado = pp.preprocesamiento_con_ortografia(comentario)
# Obtener la similitud de coseno entre el comentario y
# Cada una de las respuestas del inventario de depresión de BECK (BDI-II)
cosine_similarity_beck = w2v.get_cosine_similarity_BECK(comentario_procesado)
# Obtener la respuesta por item basandose en la similitud de coseno
results_beck = w2v.get_result_beck(cosine_similarity_beck)
print("El comentario lleno el inventario BECK de esta manera:", results_beck)
88 changes: 70 additions & 18 deletions BECK/model_word2vec_service.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pprint import pprint
from gensim.models import Word2Vec
import numpy as np

import json
class ModelWord2Vec:

def __init__(self):
Expand All @@ -10,6 +10,15 @@ def __init__(self):
"""
self.model = Word2Vec.load('./depresion.model')

def get_beck(self):
beck_data_preprocessing = {}
try:
if open('./JSON/items_preprocessing.json', 'r'):
beck_data_preprocessing = json.loads(
open('./JSON/items_preprocessing.json', 'r', encoding='utf-8').read())
except Exception as e:
print(f'Error: {e}')
return beck_data_preprocessing
def get_model(self):
"""
me devuelve el modelo
Expand Down Expand Up @@ -44,30 +53,73 @@ def get_cosine_similarity(self, corpus_a, corpus_b):
"""
return self.model.wv.n_similarity(corpus_a, corpus_b)

def get_word_vector(self, word):
def get_word_vectors(self, corpus):
"""
> La función toma una palabra como entrada y devuelve el vector de palabra para esa palabra
> La función toma una list de palabras como entrada y devuelve el vector de 250D para esa palabra
:param word: La palabra cuya representación vectorial desea obtener
:return: La palabra vector para la palabra.
"""
return self.model.wv[word]
array_result = []
# TODO en error añadir al vocabulario y volver a llamar a la función
for word in corpus:
try:
array_result.append(self.model.wv[word])
except:
array_result.append(self.getVector250())
return array_result

def getVectorBeck(self, commentVector, beck):
array = []
def get_cosine_similarity_BECK(self, corpus):
beck = self.get_beck()
data = []
for item in beck.keys():
for idx, result in enumerate(beck[item].keys()):
if idx == 0:
itemBeck = beck[item][result]
if( self.get_cosine_distance(commentVector, itemBeck["data"]) <
self.get_cosine_distance(commentVector, beck[item][result]["data"])):
itemBeck = beck[item][result]
array.append(itemBeck['value'])
return array
for result in beck[item].keys():
similarity = self.get_cosine_similarity(corpus, beck[item][result]["data"])
data.append(similarity)

return data

def get_result_beck(self, cosine_similarities):
results = []
primera_parte = cosine_similarities[:60] # 4 respuestas
segunda_parte = cosine_similarities[60:67] # 7 respuestas
tercera_parte = cosine_similarities[67:71] # 4 respuestas
cuarta_parte = cosine_similarities[71:78] # 7 respuestas
quinta_parte = cosine_similarities[78:] # 4 respuestas
results.append(self.getMaxBeck4Items(primera_parte))
results.append(self.getMaxBeck7Items(segunda_parte))
results.append(self.getMaxBeck4Items(tercera_parte))
results.append(self.getMaxBeck7Items(cuarta_parte))
results.append(self.getMaxBeck4Items(quinta_parte))
results_flat = [x for sublist in results for x in sublist]
return results_flat

def getMaxBeck4Items(self, array):
results = []
for index in range(0, len(array), 4):
item = array[index: index + 4]
mayor = 0
mayor_idx = 0
for index, result in enumerate(item):
if result > mayor:
mayor = result
mayor_idx = index
results.append(mayor_idx)
return results

def getMaxBeck7Items(self, array):
results_beck = [0, 1, 1, 2, 2, 3, 3]
results = []
for index in range(0, len(array), 7):
item = array[index: index + 7]
mayor = 0
mayor_idx = 0
for index, result in enumerate(item):
if result > mayor:
mayor = result
mayor_idx = results_beck[index]
results.append(mayor_idx)
return results

def getVector250(self):
return list(np.zeros(250))




6 changes: 5 additions & 1 deletion BECK/preprocessing_service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import emoji
import re
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
import spacy
import stanza
Expand Down Expand Up @@ -47,8 +49,10 @@ def preprocesamiento_con_ortografia(self, texto):
texto = self.eliminacion_data_inutil(texto)
texto = self.correccion_ortografica(texto)
texto = self.normalizar(texto)
texto = self.stop_words(texto)
texto = texto.split(" ")
texto = self.lematizacion(texto)
texto = " ".join(texto)
texto = self.stop_words(texto)
texto = self.eliminar_duplicados(texto)
return texto

Expand Down
8 changes: 8 additions & 0 deletions BECK/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import numpy as np
#array del 1 al 21
array = list(np.arange(1, 22))
primera_parte = array[:15] # 4 respuestas
segunda_parte = array[15:16] # 7 respuestas
tercera_parte = array[16:17] # 4 respuestas
cuarta_parte = array[17:18] # 7 respuestas
quinta_parte = array[18:] # 4 respuestas

0 comments on commit 10fdb7e

Please sign in to comment.