Skip to content

Commit

Permalink
Feat: Added Beck with first comment test
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianCB-dev committed Oct 9, 2022
1 parent cabfb07 commit 694b666
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 29 deletions.
42 changes: 14 additions & 28 deletions BECK/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,30 @@

from preprocessing_service import Preprocesamiento
from model_word2vec_service import ModelWord2Vec
#import nltk
#nltk.download('punkt')
import nltk
nltk.download('punkt')

# Leer información del archivo items.json y de comentarios test
comments_array = list(open('./comentarios_test.txt', 'r', encoding='utf-8').readlines())
comments_array = list(open('./comentarios_test.txt',
'r', encoding='utf-8').readlines())

# Leer información del archivo item_preprocess.json
beck_data_preprocessing = {}
try:
if open('./JSON/items_preprocessing.json', 'r'):
beck_data_preprocessing = json.loads(open('./JSON/items_preprocessing.json', 'r', encoding='utf-8').read())
if open('./JSON/items_preprocessing.json', 'r'):
beck_data_preprocessing = json.loads(
open('./JSON/items_preprocessing.json', 'r', encoding='utf-8').read())
except Exception as e:
print(f'Error: {e}')
print(f'Error: {e}')


preprocesamiento = Preprocesamiento()
# Vector Space Embedding
w2v = ModelWord2Vec()
preprocesamiento = Preprocesamiento()

comment_test = preprocesamiento.preprocesamiento_con_ortografia(comments_array[0])

#w2v.add_corpus('null')


array_item = []

item_string = 'Tristeza'
for key in beck_data_preprocessing[item_string].keys():
array_item.append(beck_data_preprocessing[item_string][key]["data"])
comment_test = preprocesamiento.preprocesamiento_con_ortografia(
comments_array[0])

print(comment_test)
i = 0
for item in array_item:
coseno = w2v.get_cosine_distance(item, comment_test)
print(f'{item_string} - Item BECK {i} distancia coseno: ${coseno}')
i += 1
i = 0
for item in array_item:
euclidian = w2v.get_euclidian_distance(item, comment_test)
print(f'{item_string} - Item BECK {i} distancia euclidiana: ${euclidian}')
i += 1
# Get Vector Beck
result = w2v.getVectorBeck(comment_test, beck_data_preprocessing)
print(result)
16 changes: 15 additions & 1 deletion BECK/model_word2vec_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,18 @@ def get_word_vector(self, word):
:param word: La palabra cuya representación vectorial desea obtener
:return: La palabra vector para la palabra.
"""
return self.model.wv[word]
return self.model.wv[word]

def getVectorBeck(self, commentVector, beck):
array = []
for item in beck.keys():
for idx, result in enumerate(beck[item].keys()):
if idx == 0:
itemBeck = beck[item][result]
print(itemBeck)
if( self.get_cosine_distance(commentVector, itemBeck["data"]) <
self.get_cosine_distance(commentVector, beck[item][result]["data"])):
itemBeck = beck[item][result]
array.append(itemBeck['value'])
return array

Binary file modified BECK/word2vec.model
Binary file not shown.

0 comments on commit 694b666

Please sign in to comment.