-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEvaluateFunction.py
58 lines (47 loc) · 2.18 KB
/
EvaluateFunction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import random
import numpy as np
def all_position_values(user_interaction_df):
"""
get the position off all position containing a value (not na)
returns a lists inside a list [[a,b],[c,d]...]
On Place 0 if the nested list is the index(in my example = userID), on place 1 is the column
"""
positions_with_values = []
is_null = user_interaction_df.isnull()
for column in is_null.columns:
for row in is_null.index.tolist():
if is_null.loc[row, column]==False:
positions_with_values.append([row,column])
return positions_with_values
def random_train_set(user_interaction_df, size_of_train_set): # replace at random positions the values with nan
"""
Create a random train set
Choose from the position with values randomly position and replace them by nan
returns the train set and the position, which have been replaced
"""
values_position = all_position_values(user_interaction_df)
sample = random.choices(values_position, k=int((1-size_of_train_set)*len(values_position)))
train_set = user_interaction_df.copy()
for position in sample:
train_set.loc[position[0],position[1]] = np.nan
return train_set , sample # returns the with nan modified data set + the position of the modified position
def predicted_true_value(train_set, original_set,samples):
"""
returns predicted and true values
"""
predicted_values = []
for position in samples:
predicted_values.append(train_set.loc[position[0], position[1]])
true_values = []
for position in samples:
true_values.append(original_set.loc[position[0], position[1]])
return predicted_values,true_values
def count_close_predictions(true_value, predicted_value, range):
"""
Count predicted values who are in a range of x around the true values
"""
difference_absolut = np.absolute(np.array(true_value) - np.array(predicted_value))
assumption = np.where(difference_absolut < range, 1, 0)
Count_correct = (assumption == 1).sum()
procentual_correct = Count_correct / len(difference_absolut)
return Count_correct, procentual_correct