-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathClassification.py
77 lines (67 loc) · 2.49 KB
/
Classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from Weighting import Weighting
from Preprocessing import Preprocessing
import numpy as np
class Classification :
def __init__(self):
self.kelas=[]
self.weight=Weighting()
self.prior=[]
self.kelas_final=[]
self.training_data=[]
self.training_kelas=[]
def train(self, documents, kelas):
self.kelas = kelas
self.weight.setDocument([Preprocessing.all_in_one_without_type(document) for document in documents])
transpose = list(map(list, zip(*self.weight.getTf())))
hasil = {}
for x in range(len(self.kelas)):
if self.kelas[x] in hasil:
sum = []
for y in range(len(transpose[x])):
sum.append(hasil[self.kelas[x]][y] + transpose[x][y])
hasil[self.kelas[x]] = sum
else:
hasil[self.kelas[x]] = transpose[x]
v = len(self.weight.getFeature())
panjang=[]
for x in hasil:
temp = 0
for data in hasil[x]:
temp += data
panjang.append(temp)
self.kelas_final = list(hasil.keys())
hasil = list(hasil.values())
for y in range (len(panjang)):
temp=[]
for x in hasil[y]:
temp.append(((x+1)/(panjang[y]+v)))
self.prior.append(temp)
def testing(self,document):
self.training_data=document
return np.asarray([self.do_testing(data) for data in self.training_data])
def do_testing(self,document):
print('testing document')
print(document)
process = Preprocessing.all_in_one_without_type(document)
new_type = [ type for type in process for feature in self.weight.getFeature() if type in feature]
p = {x:self.kelas.count(x) for x in self.kelas}
item = list(p.values())
total = sum(p.values())
final_p=[(p/total)for p in item]
index =[]
for data in new_type:
if data in self.weight.getFeature():
index.append(self.weight.getFeature().index(data))
posterior=[]
for p in range (len(final_p)):
temp = 0
for x in index:
temp = self.prior[p][x]
posterior.append(temp*final_p[p])
return self.kelas_final[posterior.index(max(posterior))]
def hitung_akurasi(self,hasil,real):
count=0
for hasil in hasil:
if hasil in real:
count +=1
print(count/len(real)*100)