-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnaive_Bayes.py
58 lines (48 loc) · 1.72 KB
/
naive_Bayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from itertools import groupby
def naive_bayes(dataset, row):
"""Method calculate probability affiliation of given instance to each class in dataset
Args:
dataset (Dataset): dataset which used to classify
row (list): instance to classify
Returns:
list ((float, class)):
float - probability of this class
class - prediction
"""
classes = list(set(dataset.get_target_column()))
return [(naive_bayes_helper(dataset, row, value), value) for value in classes]
def naive_bayes_helper(dataset, row, cl):
"""Method calculate probability affiliation of given instance to given class
Args:
dataset (Dataset): dataset which used to classify
row (list): instance to classify
cl: class for which calculate probability
Returns:
float: probability of given class
"""
total = len(dataset.get_target_column())
total_class = dataset.get_target_column().count(cl)
# Probability of cl
probability = total_class / total
#
for index, value in enumerate(row):
# If provavility of value 0, return 0
m = len(list(set(dataset.get_column(index))))
# Concat column and target column
pairs = zip(dataset.get_column(index), dataset.get_target_column())
# Group by value
grouped_by_class = {
key: list(value)
for key, value in groupby(sorted(pairs, key=lambda x: x[1]), lambda x: x[1])}[cl]
# Group by cl
grouped_by_value_and_class = {
key: len(list(value))
for key, value in groupby(sorted(grouped_by_class, key=lambda x: x[0]), lambda x: x[0])}
#
if not value in grouped_by_value_and_class:
probability *= 1 / (total_class + m)
continue
n = grouped_by_value_and_class[value]
#
probability *= (n + 1) / (total_class + m)
return probability