Skip to content

Commit

Permalink
Made train dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianCB-dev committed Oct 29, 2022
1 parent e15e4b4 commit d284fc9
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 30 deletions.
45 changes: 24 additions & 21 deletions BECK/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,27 @@
preprocesamiento = Preprocesamiento()
# Vector Space Embedding
w2v = ModelWord2Vec()

comment_test = preprocesamiento.preprocesamiento_con_ortografia(
comments_array[0])
array_item = []
# Get Vector Beck
item_string = 'Pensamiento o deseos suicidas'
for key in beck_data_preprocessing[item_string].keys():
array_item.append(beck_data_preprocessing[item_string][key]["data"])

print(comment_test)
i = 0
for item in array_item:
coseno = w2v.get_cosine_distance(item, comment_test)
print(f'{item_string} - Item BECK {i} distancia coseno: ${coseno}')
euclidian = w2v.get_euclidian_distance(["hoy", "me", "quiero", "morir"], ["no", "morir"])
print(f'{item_string} - Item BECK {i} distancia euclidiana: ${euclidian}')
i += 1


result = w2v.getVectorBeck(comment_test, beck_data_preprocessing)
print(result)
word1 = ["hoy", "querer", "morir"]
word2 = ["ser", "dia", "suicidar"]
w2v.add_corpus(word1)
w2v.add_corpus(word2)
# comment_test = preprocesamiento.preprocesamiento_con_ortografia(
# comments_array[0])
# array_item = []
# # Get Vector Beck
# item_string = 'Pensamiento o deseos suicidas'
# for key in beck_data_preprocessing[item_string].keys():
# array_item.append(beck_data_preprocessing[item_string][key]["data"])

# print(comment_test)
# i = 0
# for item in array_item:
# coseno = w2v.get_cosine_distance(item, comment_test)
# print(f'{item_string} - Item BECK {i} distancia coseno: ${coseno}')
# euclidian = w2v.get_euclidian_distance(["hoy", "me", "quiero", "morir"], ["no", "morir"])
# print(f'{item_string} - Item BECK {i} distancia euclidiana: ${euclidian}')
# i += 1


# result = w2v.getVectorBeck(comment_test, beck_data_preprocessing)
# print(result)
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pprint
from model_word2vec_service import ModelWord2Vec

df = pd.read_excel("./datasets/DATASET_ENTRENAMIENTO.xlsx",index_col=[1,2]).reset_index()
df = pd.read_csv("../DATASET_ENTRENAMIENTO.csv", encoding='latin-1')
comments = list(df["text"])
classes = list(df["class"])

Expand All @@ -14,7 +14,7 @@
preprocesamiento = Preprocesamiento()

w2v = ModelWord2Vec()
df_cve = pd.read_csv('./coseno_vs_euclidian.csv')
df_cve = pd.read_csv('../coseno.csv')
columns = list(df_cve.columns)[2:]

# Lectura beck
Expand All @@ -39,11 +39,8 @@
w2v.add_corpus(comment_preprocesado)
for item in beck_data_preprocessing.keys():
for result in beck_data_preprocessing[item].keys():
new_comment[columns[contador]] = w2v.get_cosine_distance(comment_preprocesado, beck_data_preprocessing[item][result]["data"])
contador += 1
new_comment[columns[contador]] = w2v.get_euclidian_distance(comment_preprocesado, beck_data_preprocessing[item][result]["data"])
# new_comment[columns[contador]] = 0
contador += 1
new_comment[columns[contador]] = w2v.get_cosine_similarity(comment_preprocesado, beck_data_preprocessing[item][result]["data"])
contador += 1

# Add to dataframe
new_comment["Clase"] = classes[class_comment]
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions BECK/model_word2vec_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ def get_euclidian_distance(self, corpus_a, corpus_b):
return np.linalg.norm(np.array(vector_corpus_a) - np.array(vector_corpus_b))


def get_cosine_distance(self, corpus_a, corpus_b):
def get_cosine_similarity(self, corpus_a, corpus_b):
"""
La función toma dos cadenas como entrada y devuelve la distancia del coseno entre las dos cadenas.
:param corpus_a: El primer corpus a comparar
:param corpus_b: El corpus para comparar con el corpus_a corpus
:return: La distancia del coseno entre los dos corpus.
"""
return self.model.wv.wmdistance(corpus_a, corpus_b)
return self.model.wv.n_similarity(corpus_a, corpus_b)

def get_word_vector(self, word):
"""
Expand Down
Binary file modified BECK/word2vec.model
Binary file not shown.

0 comments on commit d284fc9

Please sign in to comment.