-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feat: Added more function to preprocessing service
- Loading branch information
1 parent
d46ede9
commit 7aba0bc
Showing
2 changed files
with
40 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,16 @@ | ||
import json | ||
from pprint import pprint | ||
import numpy as np | ||
from sklearn.metrics import euclidean_distances | ||
|
||
from preprocessing_service import Preprocesamiento | ||
from model_word2vec_service import ModelWord2Vec | ||
import pandas as pd | ||
import nltk | ||
nltk.download('punkt') | ||
from preprocessing_service import Preprocesamiento | ||
|
||
# Leer información del archivo items.json y de comentarios test | ||
comments_array = list(open('./comentarios_test.txt', | ||
'r', encoding='utf-8').readlines()) | ||
|
||
# Leer información del archivo item_preprocess.json | ||
beck_data_preprocessing = {} | ||
try: | ||
if open('./JSON/items_preprocessing.json', 'r'): | ||
beck_data_preprocessing = json.loads( | ||
open('./JSON/items_preprocessing.json', 'r', encoding='utf-8').read()) | ||
except Exception as e: | ||
print(f'Error: {e}') | ||
|
||
|
||
preprocesamiento = Preprocesamiento() | ||
# Vector Space Embedding | ||
w2v = ModelWord2Vec() | ||
word1 = ["hoy", "querer", "morir"] | ||
word2 = ["ser", "dia", "suicidar"] | ||
w2v.add_corpus(word1) | ||
w2v.add_corpus(word2) | ||
# comment_test = preprocesamiento.preprocesamiento_con_ortografia( | ||
# comments_array[0]) | ||
# array_item = [] | ||
# # Get Vector Beck | ||
# item_string = 'Pensamiento o deseos suicidas' | ||
# for key in beck_data_preprocessing[item_string].keys(): | ||
# array_item.append(beck_data_preprocessing[item_string][key]["data"]) | ||
|
||
# print(comment_test) | ||
# i = 0 | ||
# for item in array_item: | ||
# coseno = w2v.get_cosine_distance(item, comment_test) | ||
# print(f'{item_string} - Item BECK {i} distancia coseno: ${coseno}') | ||
# euclidian = w2v.get_euclidian_distance(["hoy", "me", "quiero", "morir"], ["no", "morir"]) | ||
# print(f'{item_string} - Item BECK {i} distancia euclidiana: ${euclidian}') | ||
# i += 1 | ||
df = pd.read_csv('./comentarios_español_depresivos.csv', encoding='utf-8') | ||
pp = Preprocesamiento() | ||
|
||
comentarios = list(df['text']) | ||
clases = list(df['class']) | ||
|
||
# result = w2v.getVectorBeck(comment_test, beck_data_preprocessing) | ||
# print(result) | ||
print('-- Comentario antes --') | ||
print(comentarios[0]) | ||
print('-- Comentario despúes --') | ||
comentario_preprocesado = pp.preprocesamiento_sin_ortografia(comentarios[0]) | ||
print(comentario_preprocesado) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters