-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexperiment_ht.py
85 lines (77 loc) · 2.9 KB
/
experiment_ht.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import numpy as np
from strlearn.evaluators import TestThenTrain
from strlearn.streams import ARFFParser
from strlearn.metrics import balanced_accuracy_score, geometric_mean_score_1, f1_score, precision, recall, specificity
from strlearn.ensembles import LearnppCDS, LearnppNIE, OOB, UOB, WAE, OUSE, OnlineBagging, SEA, KMC
from skmultiflow.trees import HoeffdingTree
import multiprocessing
names = [
# "CTU-IoT-Malware-Capture-1-1_0",
"CTU-IoT-Malware-Capture-33-1-p_2",
"CTU-IoT-Malware-Capture-43-1-p_0",
# "CTU-IoT-Malware-Capture-43-1-p_3",
# "33-1-2-43-1-3"
]
n_chunks = [
# 4000,
850,
2500,
# 1450,
# 2300
]
metrics = (balanced_accuracy_score, geometric_mean_score_1, f1_score, precision, recall, specificity)
base = HoeffdingTree(split_criterion='hellinger')
def worker(n, name):
# print(name)
filepath = ("arff/" + name + ".arff")
# print(filepath)
# stream
stream = ARFFParser(filepath, chunk_size=250, n_chunks=n_chunks[n])
# evaluator
eval = TestThenTrain(metrics=metrics, verbose=False)
# classifiers
clfs = [
SEA(base_estimator=base),
OnlineBagging(base_estimator=base, n_estimators=10),
OOB(base_estimator=base, n_estimators=10),
UOB(base_estimator=base, n_estimators=10),
LearnppCDS(base_estimator=base, n_estimators=10),
LearnppNIE(base_estimator=base, n_estimators=10),
OUSE(base_estimator=base, n_estimators=10),
KMC(base_estimator=base, n_estimators=10),
WAE(base_estimator=base, n_estimators=10),
]
print("Started: %s" % name)
eval.process(stream, clfs)
scores = eval.scores
np.save("scores/%s-ht" % name, scores)
print("Finished: %s" % name)
jobs = []
for n, name in enumerate(names):
p = multiprocessing.Process(target=worker, args=(n, name))
jobs.append(p)
p.start()
# for n, name in enumerate(names):
# print(name)
# filepath = ("arff/" + name + ".arff")
# print(filepath)
# # stream
# stream = ARFFParser(filepath, chunk_size=250, n_chunks=n_chunks[n])
# # evaluator
# eval = TestThenTrain(metrics=metrics, verbose=True)
# # classifiers
# clfs = [
# SEA(base_estimator=base),
# OnlineBagging(base_estimator=base, n_estimators=10),
# OOB(base_estimator=base, n_estimators=10),
# UOB(base_estimator=base, n_estimators=10),
# LearnppCDS(base_estimator=base, n_estimators=10),
# LearnppNIE(base_estimator=base, n_estimators=10),
# OUSE(base_estimator=base, n_estimators=10),
# KMC(base_estimator=base, n_estimators=10),
# WAE(base_estimator=base, n_estimators=10),
# ]
#
# eval.process(stream, clfs)
# scores = eval.scores
# np.save("scores/%s-gnb" % name, scores)