Skip to content

Commit

Permalink
Feat: Added data
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianCB-dev committed Oct 13, 2022
1 parent 58187c0 commit 0e4d876
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 3 deletions.
Binary file added .DS_Store
Binary file not shown.
Binary file added BECK/.DS_Store
Binary file not shown.
1 change: 0 additions & 1 deletion BECK/coseno_vs_euclidian.csv

This file was deleted.

4 changes: 3 additions & 1 deletion BECK/coseno_vs_euclidian.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import pprint
from model_word2vec_service import ModelWord2Vec

df = pd.read_excel("./datasets/DATASET_ENTRENAMIENTO.xlsx",index_col=[1,2]).reset_index()
comments = list(df["text"])
classes = list(df["class"])
Expand Down Expand Up @@ -48,8 +49,9 @@
new_comment["Clase"] = classes[class_comment]
df_cve = df_cve.append(new_comment, ignore_index=True)
class_comment += 1
except Exception:
except Exception as e:
print(f'Error en el comentario {class_comment} omitiendo...')
print(e)
class_comment += 1
continue

Expand Down
70 changes: 70 additions & 0 deletions BECK/get_item_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from cmath import inf
import pandas as pd
from pprint import pprint

df = pd.read_csv('./datasets/coseno_vs_euclidian.csv')
values_items_largos = [0, 1, 1, 2, 2, 3, 3]
df_results = pd.read_csv('./results.csv')
classes = list(df['Clase'])
class_idx = 0
columns = list(df_results.columns)[2:]
for i in range(0, 7068):
column = 0
comment = list(df.iloc[i])
result = {}
result['Comentario'] = comment[0]
result['Comentario Preprocesado'] = comment[1]
rest_comment = comment[-29:-1]
comment = comment[2:-29]
for i in range(0, len(comment), 8):
data = comment[i:(i+8)]
# Euclidian
menor_euclidian = inf
menor_euclidian_idx = 0
#Coseno
menor_coseno = inf
menor_coseno_idx = 0
# Coseno
for idx, i2 in enumerate(range(0, len(data), 2)):
if data[i2] < menor_coseno:
menor_coseno = data[i2]
menor_coseno_idx = idx
# Euclidian
for idx, i3 in enumerate(range(1, len(data), 2)):
if data[i3] < menor_euclidian:
menor_euclidian = data[i3]
menor_euclidian_idx = idx
result[columns[column]] = menor_coseno_idx
column += 1
result[columns[column]] = menor_euclidian_idx
column += 1
# Anormal items
for i in range(0, len(rest_comment), 14):
data = rest_comment[i:(i+14)]
# Euclidian
menor_euclidian = inf
menor_euclidian_idx = 0
#Coseno
menor_coseno = inf
menor_coseno_idx = 0
# Coseno
for idx, i2 in enumerate(range(0, len(data), 2)):
if data[i2] < menor_coseno:
menor_coseno = data[i2]
menor_coseno_idx = idx
# Euclidian
for idx, i3 in enumerate(range(1, len(data), 2)):
if data[i3] < menor_euclidian:
menor_euclidian = data[i3]
menor_euclidian_idx = idx
result[columns[column]] = values_items_largos[menor_coseno_idx]
column += 1
result[columns[column]] = values_items_largos[menor_euclidian_idx]
column += 1

result['Clase'] = classes[class_idx]
class_idx += 1
df_results = df_results.append(result, ignore_index=True)


df_results.to_csv('results_coseno_euclidian.csv', index=False)
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,11 @@ model.save('word2vec.model')
Para distancia de coseno entre dos textos convertidos a arreglos.
```
coseno = model.wv.wmdistance(first_array, second_array)
```
```

## Citations
Ofir Pele and Michael Werman "A linear time histogram metric for improved SIFT matching" &lt;http://www.cs.huji.ac.il/\~werman/Papers/ECCV2008.pdf&gt;_

Ofir Pele and Michael Werman "Fast and robust earth mover's distances" &lt;https://ieeexplore.ieee.org/document/5459199/&gt;_

Matt Kusner et al. "From Word Embeddings To Document Distances" &lt;http://proceedings.mlr.press/v37/kusnerb15.pdf&gt;

0 comments on commit 0e4d876

Please sign in to comment.