-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathfunctions.py
145 lines (113 loc) · 6.03 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import pandas as pd
import numpy as np
import zipfile
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel
from nltk.corpus import stopwords
import nltk
from wordcloud import WordCloud
nltk.download('stopwords')
# Cargar datos desde archivos ZIP
def load_hotels_data(zip_file_path):
with zipfile.ZipFile(zip_file_path, 'r') as zip_file:
zip_file_contents = zip_file.namelist()
csv_file_name = zip_file_contents[0]
with zip_file.open(csv_file_name) as csv_file:
return pd.read_csv(csv_file)
# Cargar datos de hoteles
def load_hotel_data():
zip_file_path = '2_Datasets/beta/hotelbeds/df_hotels.zip'
return load_hotels_data(zip_file_path)
# Cargar datos de recomendaciones
def load_recommendations_data():
zip_file_path = '2_Datasets/beta/hotelbeds/df_hotels_.zip'
return load_hotels_data(zip_file_path)
df_hotels_filter = load_hotel_data()
# Función para crear una lista de hoteles a través de ajax
def hotels_list_to_html():
df_hotel = pd.read_csv("2_Datasets/launch/hotels_name_to_frontend.csv")
df_hotel = df_hotel.dropna(subset=['name'])
df_state = df_hotels_filter.drop_duplicates(subset=['state_name'])
df_state = df_state.dropna(subset=['state_name'])
df_city = df_hotels_filter.drop_duplicates(subset=['city_name'])
df_city = df_city.dropna(subset=['city_name'])
df_poi = df_hotels_filter.drop_duplicates(subset=['poi_name'])
df_poi = df_poi.dropna(subset=['poi_name'])
return df_hotel["name"].tolist(), df_state["state_name"].tolist(), df_city["city_name"].tolist(), df_poi["poi_name"].tolist()
# Filtros para búsqueda de hoteles con más puntuación
def filters(state:str = '0', city:str = '0', name_point:str = '0'):
try:
if state != '0':
filter_state = df_hotels_filter[df_hotels_filter['state_name'] == state].sort_values(ascending=False, by=['stars'])
if city != '0':
filter_state = filter_state[filter_state['city_name'] == city].sort_values(ascending=False, by=['stars'])
if name_point != '0':
filter_state = filter_state[filter_state['poi_name'] == name_point].sort_values(ascending=False, by=['stars'])
return filter_state.iloc[:3,[11,12,1,15,2,4,5,8,9,13,14]].to_html(index=False, justify="center")
elif city != '0':
filter_state = df_hotels_filter[df_hotels_filter['city_name'] == city].sort_values(ascending=False, by=['stars'])
return filter_state.iloc[:3,[11,12,1,15,2,4,5,8,9,13,14]].to_html(index=False, justify="center")
elif name_point != '0':
filter_state = df_hotels_filter[df_hotels_filter['poi_name'] == name_point].sort_values(ascending=False, by=['stars'])
return filter_state.iloc[:3,[11,12,1,15,2,4,5,8,9,13,14]].to_html(index=False, justify="center")
except:
return 'INGRESAR UNA BÚSQUEDA'
#función para lista filtrada estados
def list_filter_state(estado: str):
city_x_state = df_hotels_filter[df_hotels_filter["state_name"] == estado]["city_name"].drop_duplicates().dropna()
poi_x_state = df_hotels_filter[df_hotels_filter["state_name"] == estado]["poi_name"].drop_duplicates().dropna()
return city_x_state.tolist(), poi_x_state.tolist()
#función para lista filtrada ciudades
def list_filter_city(ciudad: str):
poi_x_city = df_hotels_filter[df_hotels_filter["city_name"] == ciudad]["poi_name"].drop_duplicates().dropna()
return poi_x_city.tolist()
# Función para el conteo de sentimientos
def get_sentiments_count(title, preview_recommendations):
# Cargar el DataFrame precalculado desde el archivo CSV
recommendations = preview_recommendations[preview_recommendations['name'] == title]
# Inicializar un diccionario para almacenar las cuentas
sentiment_counts = {}
# Iterar a través de las filas del DataFrame filtrado
for index, fila in recommendations.iterrows():
etiqueta = fila['sentiment_text']
# Agregar la etiqueta al diccionario de cuentas
if etiqueta in sentiment_counts:
sentiment_counts[etiqueta] += 1
else:
sentiment_counts[etiqueta] = 1
if not recommendations.empty:
# La lista 'cuentas' ahora contiene las cuentas de sentimiento para el hotel específico
return sentiment_counts
else:
return ["Hotel no encontrado en la Base de Datos."]
# Función para recomendar hoteles similares
def load_data_and_generate_recommendations():
df_hotels = load_recommendations_data()
# Instanciamos el CountVectorizer
vectorizer = CountVectorizer()
# Eliminamos las "stop words", palabras comunes no informativas
stop = list(stopwords.words('english'))
tfidf = TfidfVectorizer(stop_words=stop)
# Calculamos los features para cada ítem (texto)
tfidf_matrix = tfidf.fit_transform(df_hotels['review'])
# Calculamos las similitudes entre todos los documentos
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)
# Cantidad de hoteles a recomendar
n = 6
# Creamos un diccionario para guardar el resultado
results = {}
for idx, row in df_hotels.iterrows():
# Guardamos los índices similares basados en la similitud coseno.
# Los ordenamos en modo ascendente, siendo 0 nada de similitud y 1 total.
similar_indices = cosine_similarities[idx].argsort()[:-n-2:-1]
# Guardamos los N más cercanos
similar_items = [i for i in similar_indices]
results[f"{row['name']}"] = df_hotels.iloc[similar_items[1:], [1, 9, 2, 8, 7, 3, 4, 5, 6]]
# Definimos una función de recomendación
def recommend(hotel):
df_recommend = pd.DataFrame(results[hotel])
df_recommend = df_recommend[['name', 'stars', 'address', 'city_name', 'state_name', 'phones', 'web']]
return df_recommend.to_html(index=False, justify="center")
return recommend
# Cargar datos y generar la función de recomendación
recommendation_function = load_data_and_generate_recommendations()