-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathStatsJob.py
861 lines (830 loc) · 48.7 KB
/
StatsJob.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
"""
File: StatsJob.py
Authors: Ryan J. Urbanowicz, Robert Zhang
Institution: University of Pensylvania, Philadelphia PA
Creation Date: 6/1/2021
License: GPL 3.0
Description: Phase 6 of AutoMLPipe-BC - This 'Job' script is called by StatsMain.py and creates summaries of ML classification evaluation statistics
(means and standard deviations), ROC and PRC plots (comparing CV performance in the same ML algorithm and comparing average performance
between ML algorithms), model feature importance averages over CV runs, boxplots comparing ML algorithms for each metric, Kruskal Wallis
and Mann Whitney statistical comparsions between ML algorithms, model feature importance boxplots for each algorithm, and composite feature
importance plots summarizing model feature importance across all ML algorithms. It is run for a single dataset from the original target
dataset folder (data_path) in Phase 1 (i.e. stats summary completed for all cv datasets).
"""
#Import required packages ---------------------------------------------------------------------------------------------------------------------------
import sys
import time
import pandas as pd
import glob
import numpy as np
from scipy import interp,stats
import matplotlib.pyplot as plt
from matplotlib import rc
import os
from sklearn.metrics import auc
import csv
from statistics import mean,stdev
import pickle
import copy
def job(full_path,encoded_algos,plot_ROC,plot_PRC,plot_FI_box,class_label,instance_label,cv_partitions,plot_metric_boxplots,primary_metric,top_results,sig_cutoff,jupyterRun):
""" Run all elements of stats summary and analysis for one one the original phase 1 datasets: summaries of average and standard deviations for all metrics and modeling algorithms,
ROC and PRC plots (comparing CV performance in the same ML algorithm and comparing average performance between ML algorithms), model feature importance averages over CV runs,
boxplots comparing ML algorithms for each metric, Kruskal Wallis and Mann Whitney statistical comparsions between ML algorithms, model feature importance boxplots for each
algorithm, and composite feature importance plots summarizing model feature importance across all ML algorithms"""
job_start_time = time.time() #for tracking phase runtime
data_name = full_path.split('/')[-1]
if eval(jupyterRun):
print('Running Statistics Summary for '+str(data_name))
#Translate metric name from scikitlearn standard (currently balanced accuracy is hardcoded for use in generating FI plots due to no-skill normalization)
metric_term_dict = {'balanced_accuracy': 'Balanced Accuracy','accuracy': 'Accuracy','f1': 'F1_Score','recall': 'Sensitivity (Recall)','precision': 'Precision (PPV)','roc_auc': 'ROC_AUC'}
primary_metric = metric_term_dict[primary_metric]
#Get algorithms run, specify algorithm abbreviations, colors to use for algorithms in plots, and original ordered feature name list
algorithms,abbrev,colors,original_headers = preparation(full_path,encoded_algos)
#Gather and summarize all evaluation metrics for each algorithm across all CVs. Returns result_table used to plot average ROC and PRC plots and metric_dict organizing all metrics over all algorithms and CVs.
result_table,metric_dict = primaryStats(algorithms,original_headers,cv_partitions,full_path,data_name,instance_label,class_label,abbrev,colors,plot_ROC,plot_PRC,jupyterRun)
#Plot ROC and PRC curves comparing average ML algorithm performance (averaged over all CVs)
if eval(jupyterRun):
print('Generating ROC and PRC plots...')
doPlotROC(result_table,colors,full_path,jupyterRun)
doPlotPRC(result_table,colors,full_path,data_name,instance_label,class_label,jupyterRun)
#Make list of metric names
if eval(jupyterRun):
print('Saving Metric Summaries...')
metrics = list(metric_dict[algorithms[0]].keys())
#Save metric means and standard deviations
saveMetricMeans(full_path,metrics,metric_dict)
saveMetricStd(full_path,metrics,metric_dict)
#Generate boxplots comparing algorithm performance for each standard metric, if specified by user
if eval(plot_metric_boxplots):
if eval(jupyterRun):
print('Generating Metric Boxplots...')
metricBoxplots(full_path,metrics,algorithms,metric_dict,jupyterRun)
#Calculate and export Kruskal Wallis, Mann Whitney, and wilcoxon Rank sum stats if more than one ML algorithm has been run (for the comparison) - note stats are based on comparing the multiple CV models for each algorithm.
if len(algorithms) > 1:
if eval(jupyterRun):
print('Running Non-Parametric Statistical Significance Analysis...')
kruskal_summary = kruskalWallis(full_path,metrics,algorithms,metric_dict,sig_cutoff)
wilcoxonRank(full_path,metrics,algorithms,metric_dict,kruskal_summary,sig_cutoff)
mannWhitneyU(full_path,metrics,algorithms,metric_dict,kruskal_summary,sig_cutoff)
#Prepare for feature importance visualizations
if eval(jupyterRun):
print('Preparing for Model Feature Importance Plotting...')
fi_df_list,fi_ave_list,fi_ave_norm_list,ave_metric_list,all_feature_list,non_zero_union_features,non_zero_union_indexes = prepFI(algorithms,full_path,abbrev,metric_dict,'Balanced Accuracy')
#Select 'top' features for composite vizualization
featuresToViz = selectForCompositeViz(top_results,non_zero_union_features,non_zero_union_indexes,algorithms,ave_metric_list,fi_ave_norm_list)
#Generate FI boxplots for each modeling algorithm if specified by user
if eval(plot_FI_box):
if eval(jupyterRun):
print('Generating Feature Importance Boxplots and Histograms...')
doFIBoxplots(full_path,fi_df_list,fi_ave_list,algorithms,original_headers,top_results,jupyterRun)
doFI_Histogram(full_path, fi_ave_list, algorithms, jupyterRun)
#Visualize composite FI - Currently set up to only use Balanced Accuracy for composite FI plot visualization
if eval(jupyterRun):
print('Generating Composite Feature Importance Plots...')
#Take top feature names to vizualize and get associated feature importance values for each algorithm, and original data ordered feature names list
top_fi_ave_norm_list,all_feature_listToViz = getFI_To_Viz_Sorted(featuresToViz,all_feature_list,algorithms,fi_ave_norm_list) #If we want composite FI plots to be displayed in descenting total bar height order.
#Generate Normalized composite FI plot
composite_FI_plot(top_fi_ave_norm_list, algorithms, list(colors.values()), all_feature_listToViz, 'Norm',full_path,jupyterRun, 'Normalized Feature Importance')
#Fractionate FI scores for normalized and fractionated composite FI plot
fracLists = fracFI(top_fi_ave_norm_list)
#Generate Normalized and Fractioned composite FI plot
composite_FI_plot(fracLists, algorithms, list(colors.values()), all_feature_listToViz, 'Norm_Frac',full_path,jupyterRun, 'Normalized and Fractioned Feature Importance')
#Weight FI scores for normalized and (model performance) weighted composite FI plot
weightedLists,weights = weightFI(ave_metric_list,top_fi_ave_norm_list)
#Generate Normalized and Weighted Compount FI plot
composite_FI_plot(weightedLists, algorithms, list(colors.values()), all_feature_listToViz, 'Norm_Weight',full_path,jupyterRun, 'Normalized and Weighted Feature Importance')
#Weight the Fractionated FI scores for normalized,fractionated, and weighted compount FI plot
weightedFracLists = weightFracFI(fracLists,weights)
#Generate Normalized, Fractionated, and Weighted Compount FI plot
composite_FI_plot(weightedFracLists, algorithms, list(colors.values()), all_feature_listToViz, 'Norm_Frac_Weight',full_path,jupyterRun, 'Normalized, Fractioned, and Weighted Feature Importance')
#Export phase runtime
saveRuntime(full_path,job_start_time)
#Parse all pipeline runtime files into a single runtime report
parseRuntime(full_path,abbrev)
# Print phase completion
print(data_name + " phase 5 complete")
experiment_path = '/'.join(full_path.split('/')[:-1])
job_file = open(experiment_path + '/jobsCompleted/job_stats_' + data_name + '.txt', 'w')
job_file.write('complete')
job_file.close()
def preparation(full_path,encoded_algos):
""" Creates directory for all results files, decodes included ML modeling algorithms that were run, specifies figure abbreviations for algorithms
and color to use for each algorithm in plots, and loads original ordered feature name list to use as a reference to facilitate combining feature
importance results across cv runs where different features may have been dropped during the feature selection phase."""
#Create Directory
if not os.path.exists(full_path+'/model_evaluation'):
os.mkdir(full_path+'/model_evaluation')
#Decode algos
algorithms = []
possible_algos = ['Naive Bayes','Logistic Regression','Decision Tree','Random Forest','Gradient Boosting','XGB','LGB','SVM','ANN','K Neighbors','eLCS','XCS','ExSTraCS']
algorithms = decode(algorithms, encoded_algos, possible_algos, 0)
algorithms = decode(algorithms, encoded_algos, possible_algos, 1)
algorithms = decode(algorithms, encoded_algos, possible_algos, 2)
algorithms = decode(algorithms, encoded_algos, possible_algos, 3)
algorithms = decode(algorithms, encoded_algos, possible_algos, 4)
algorithms = decode(algorithms, encoded_algos, possible_algos, 5)
algorithms = decode(algorithms, encoded_algos, possible_algos, 6)
algorithms = decode(algorithms, encoded_algos, possible_algos, 7)
algorithms = decode(algorithms, encoded_algos, possible_algos, 8)
algorithms = decode(algorithms, encoded_algos, possible_algos, 9)
algorithms = decode(algorithms, encoded_algos, possible_algos, 10)
algorithms = decode(algorithms, encoded_algos, possible_algos, 11)
algorithms = decode(algorithms, encoded_algos, possible_algos, 12)
abbrev = {'Naive Bayes':'NB','Logistic Regression':'LR','Decision Tree':'DT','Random Forest':'RF','Gradient Boosting':'GB','XGB':'XGB','LGB':'LGB','SVM':'SVM','ANN':'ANN','K Neighbors':'KN','eLCS':'eLCS','XCS':'XCS','ExSTraCS':'ExSTraCS'}
colors = {'Naive Bayes':'grey','Logistic Regression':'black','Decision Tree':'yellow','Random Forest':'orange','Gradient Boosting':'bisque','XGB':'purple','LGB':'aqua','SVM':'blue','ANN':'red','eLCS':'firebrick','XCS':'deepskyblue','K Neighbors':'seagreen','ExSTraCS':'lightcoral'}
original_headers = pd.read_csv(full_path+"/exploratory/OriginalFeatureNames.csv",sep=',').columns.values.tolist() #Get Original Headers
return algorithms,abbrev,colors,original_headers
def decode(algorithms,encoded_algos,possible_algos,index):
""" Decodes which algorithms were applied in this pipeline. """
if encoded_algos[index] == "1":
algorithms.append(possible_algos[index])
return algorithms
def primaryStats(algorithms,original_headers,cv_partitions,full_path,data_name,instance_label,class_label,abbrev,colors,plot_ROC,plot_PRC,jupyterRun):
""" Combine classification metrics and model feature importance scores as well as ROC and PRC plot data across all CV datasets.
Generate ROC and PRC plots comparing separate CV models for each individual modeling algorithm."""
result_table = []
metric_dict = {}
for algorithm in algorithms: #completed for each individual ML modeling algorithm
alg_result_table = [] #stores values used in ROC and PRC plots
# Define evaluation stats variable lists
s_bac = [] # balanced accuracies
s_ac = [] # standard accuracies
s_f1 = [] # F1 scores
s_re = [] # recall values
s_sp = [] # specificities
s_pr = [] # precision values
s_tp = [] # true positives
s_tn = [] # true negatives
s_fp = [] # false positives
s_fn = [] # false negatives
s_npv = [] # negative predictive values
s_lrp = [] # likelihood ratio positive values
s_lrm = [] # likelihood ratio negative values
# Define feature importance lists
FI_all = [] # used to save model feature importances individually for each cv within single summary file (all original features in dataset prior to feature selection included)
# Define ROC plot variable lists
tprs = [] # stores interpolated true postitive rates for average CV line in ROC
aucs = [] #stores individual CV areas under ROC curve to calculate average
mean_fpr = np.linspace(0, 1, 100) #used to plot average of CV line in ROC plot
mean_recall = np.linspace(0, 1, 100) #used to plot average of CV line in PRC plot
# Define PRC plot variable lists
precs = [] #stores interpolated precision values for average CV line in PRC
praucs = [] #stores individual CV areas under PRC curve to calculate average
aveprecs = [] #stores individual CV average precisions for PRC to calculate CV average
#Gather statistics over all CV partitions
for cvCount in range(0,cv_partitions):
#Unpickle saved metrics from previous phase
result_file = full_path+'/model_evaluation/pickled_metrics/'+abbrev[algorithm]+"_CV_"+str(cvCount)+"_metrics"
file = open(result_file, 'rb')
results = pickle.load(file) #[metricList, fpr, tpr, roc_auc, prec, recall, prec_rec_auc, ave_prec, fi, probas_]
file.close()
#Separate pickled results
metricList = results[0]
fpr = results[1]
tpr = results[2]
roc_auc = results[3]
prec = results[4]
recall = results[5]
prec_rec_auc = results[6]
ave_prec = results[7]
fi = results[8]
#Separate metrics from metricList
s_bac.append(metricList[0])
s_ac.append(metricList[1])
s_f1.append(metricList[2])
s_re.append(metricList[3])
s_sp.append(metricList[4])
s_pr.append(metricList[5])
s_tp.append(metricList[6])
s_tn.append(metricList[7])
s_fp.append(metricList[8])
s_fn.append(metricList[9])
s_npv.append(metricList[10])
s_lrp.append(metricList[11])
s_lrm.append(metricList[12])
#update list that stores values used in ROC and PRC plots
alg_result_table.append([fpr, tpr, roc_auc, prec, recall, prec_rec_auc, ave_prec]) # alg_result_table.append([fpr, tpr, roc_auc, recall, prec, prec_rec_auc, ave_prec])
# Update ROC plot variable lists needed to plot all CVs in one ROC plot
tprs.append(interp(mean_fpr, fpr, tpr))
tprs[-1][0] = 0.0
aucs.append(roc_auc)
# Update PRC plot variable lists needed to plot all CVs in one PRC plot
precs.append(interp(mean_recall, recall, prec)) #old way
#precs.append(interp(mean_recall, prec, recall))
praucs.append(prec_rec_auc)
aveprecs.append(ave_prec)
# Format feature importance scores as list (takes into account that all features are not in each CV partition)
tempList = []
j = 0
headers = pd.read_csv(full_path+'/CVDatasets/'+data_name+'_CV_'+str(cvCount)+'_Test.csv').columns.values.tolist()
if instance_label != 'None':
headers.remove(instance_label)
headers.remove(class_label)
for each in original_headers:
if each in headers: # Check if current feature from original dataset was in the partition
# Deal with features not being in original order (find index of current feature list.index()
f_index = headers.index(each)
tempList.append(fi[f_index])
else:
tempList.append(0)
j += 1
FI_all.append(tempList)
if jupyterRun:
print(algorithm)
#Define values for the mean ROC line (mean of individual CVs)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = np.mean(aucs)
#Generate ROC Plot (including individual CV's lines, average line, and no skill line) - based on https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html-----------------------
if eval(plot_ROC):
# Set figure dimensions
plt.rcParams["figure.figsize"] = (6,6)
# Plot individual CV ROC lines
for i in range(cv_partitions):
plt.plot(alg_result_table[i][0], alg_result_table[i][1], lw=1, alpha=0.3,label='ROC fold %d (AUC = %0.3f)' % (i, alg_result_table[i][2]))
# Plot no-skill line
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',label='No-Skill', alpha=.8)
# Plot average line for all CVs
std_auc = np.std(aucs) # AUC standard deviations across CVs
plt.plot(mean_fpr, mean_tpr, color=colors[algorithm],label=r'Mean ROC (AUC = %0.3f $\pm$ %0.3f)' % (mean_auc, std_auc),lw=2, alpha=.8)
# Plot standard deviation grey zone of curves
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,label=r'$\pm$ 1 std. dev.')
#Specify plot axes,labels, and legend
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="upper left", bbox_to_anchor=(1.01,1))
#Export and/or show plot
plt.savefig(full_path+'/model_evaluation/'+abbrev[algorithm]+"_ROC.png", bbox_inches="tight")
if eval(jupyterRun):
plt.show()
else:
plt.close('all')
#Define values for the mean PRC line (mean of individual CVs)
mean_prec = np.mean(precs, axis=0)
mean_pr_auc = np.mean(praucs)
#Generate PRC Plot (including individual CV's lines, average line, and no skill line)------------------------------------------------------------------------------------------------------------------
if eval(plot_PRC):
# Set figure dimensions
plt.rcParams["figure.figsize"] = (6,6)
# Plot individual CV PRC lines
for i in range(cv_partitions):
plt.plot(alg_result_table[i][4], alg_result_table[i][3], lw=1, alpha=0.3, label='PRC fold %d (AUC = %0.3f)' % (i, alg_result_table[i][5]))
#Estimate no skill line based on the fraction of cases found in the first test dataset
test = pd.read_csv(full_path + '/CVDatasets/' + data_name + '_CV_0_Test.csv') #Technically there could be a unique no-skill line for each CV dataset based on final class balance (however only one is needed, and stratified CV attempts to keep partitions with similar/same class balance)
testY = test[class_label].values
noskill = len(testY[testY == 1]) / len(testY) # Fraction of cases
# Plot no-skill line
plt.plot([0, 1], [noskill, noskill], color='orange', linestyle='--', label='No-Skill', alpha=.8)
# Plot average line for all CVs
std_pr_auc = np.std(praucs)
plt.plot(mean_recall, mean_prec, color=colors[algorithm],label=r'Mean PRC (AUC = %0.3f $\pm$ %0.3f)' % (mean_pr_auc, std_pr_auc),lw=2, alpha=.8)
# Plot standard deviation grey zone of curves
std_prec = np.std(precs, axis=0)
precs_upper = np.minimum(mean_prec + std_prec, 1)
precs_lower = np.maximum(mean_prec - std_prec, 0)
plt.fill_between(mean_recall, precs_lower, precs_upper, color='grey', alpha=.2,label=r'$\pm$ 1 std. dev.')
#Specify plot axes,labels, and legend
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('Recall (Sensitivity)')
plt.ylabel('Precision (PPV)')
plt.legend(loc="upper left", bbox_to_anchor=(1.01,1))
#Export and/or show plot
plt.savefig(full_path+'/model_evaluation/'+abbrev[algorithm]+"_PRC.png", bbox_inches="tight")
if eval(jupyterRun):
plt.show()
else:
plt.close('all')
#Export and save all CV metric stats for each individual algorithm -----------------------------------------------------------------------------
results = {'Balanced Accuracy': s_bac, 'Accuracy': s_ac, 'F1_Score': s_f1, 'Sensitivity (Recall)': s_re, 'Specificity': s_sp,'Precision (PPV)': s_pr, 'TP': s_tp, 'TN': s_tn, 'FP': s_fp, 'FN': s_fn, 'NPV': s_npv, 'LR+': s_lrp, 'LR-': s_lrm, 'ROC_AUC': aucs,'PRC_AUC': praucs, 'PRC_APS': aveprecs}
dr = pd.DataFrame(results)
filepath = full_path+'/model_evaluation/'+abbrev[algorithm]+"_performance.csv"
dr.to_csv(filepath, header=True, index=False)
metric_dict[algorithm] = results
#Save Average FI Stats
save_FI(FI_all, abbrev[algorithm], original_headers, full_path)
#Store ave metrics for creating global ROC and PRC plots later
mean_ave_prec = np.mean(aveprecs)
#result_dict = {'algorithm':algorithm,'fpr':mean_fpr, 'tpr':mean_tpr, 'auc':mean_auc, 'prec':mean_prec, 'pr_auc':mean_pr_auc, 'ave_prec':mean_ave_prec}
result_dict = {'algorithm':algorithm,'fpr':mean_fpr, 'tpr':mean_tpr, 'auc':mean_auc, 'prec':mean_prec, 'recall':mean_recall, 'pr_auc':mean_pr_auc, 'ave_prec':mean_ave_prec}
result_table.append(result_dict)
#Result table later used to create global ROC an PRC plots comparing average ML algorithm performance.
result_table = pd.DataFrame.from_dict(result_table)
result_table.set_index('algorithm',inplace=True)
return result_table,metric_dict
def save_FI(FI_all,algorithm,globalFeatureList,full_path):
""" Creates directory to store model feature importance results and, for each algorithm, exports a file of feature importance scores from each CV. """
dr = pd.DataFrame(FI_all)
if not os.path.exists(full_path+'/model_evaluation/feature_importance/'):
os.mkdir(full_path+'/model_evaluation/feature_importance/')
filepath = full_path+'/model_evaluation/feature_importance/'+algorithm+"_FI.csv"
dr.to_csv(filepath, header=globalFeatureList, index=False)
def doPlotROC(result_table,colors,full_path,jupyterRun):
""" Generate ROC plot comparing average ML algorithm performance (over all CV training/testing sets)"""
count = 0
#Plot curves for each individual ML algorithm
for i in result_table.index:
plt.plot(result_table.loc[i]['fpr'],result_table.loc[i]['tpr'], color=colors[i],label="{}, AUC={:.3f}".format(i, result_table.loc[i]['auc']))
count += 1
# Set figure dimensions
plt.rcParams["figure.figsize"] = (6,6)
# Plot no-skill line
plt.plot([0, 1], [0, 1], color='orange', linestyle='--', label='No-Skill', alpha=.8)
#Specify plot axes,labels, and legend
plt.xticks(np.arange(0.0, 1.1, step=0.1))
plt.xlabel("False Positive Rate", fontsize=15)
plt.yticks(np.arange(0.0, 1.1, step=0.1))
plt.ylabel("True Positive Rate", fontsize=15)
plt.legend(loc="upper left", bbox_to_anchor=(1.01,1))
#Export and/or show plot
plt.savefig(full_path+'/model_evaluation/Summary_ROC.png', bbox_inches="tight")
if eval(jupyterRun):
plt.show()
else:
plt.close('all')
def doPlotPRC(result_table,colors,full_path,data_name,instance_label,class_label,jupyterRun):
""" Generate PRC plot comparing average ML algorithm performance (over all CV training/testing sets)"""
count = 0
#Plot curves for each individual ML algorithm
for i in result_table.index:
plt.plot(result_table.loc[i]['recall'],result_table.loc[i]['prec'], color=colors[i],label="{}, AUC={:.3f}, APS={:.3f}".format(i, result_table.loc[i]['pr_auc'],result_table.loc[i]['ave_prec']))
count += 1
#Estimate no skill line based on the fraction of cases found in the first test dataset
test = pd.read_csv(full_path+'/CVDatasets/'+data_name+'_CV_0_Test.csv')
if instance_label != 'None':
test = test.drop(instance_label, axis=1)
testY = test[class_label].values
noskill = len(testY[testY == 1]) / len(testY) # Fraction of cases
# Plot no-skill line
plt.plot([0, 1], [noskill, noskill], color='orange', linestyle='--',label='No-Skill', alpha=.8)
#Specify plot axes,labels, and legend
plt.xticks(np.arange(0.0, 1.1, step=0.1))
plt.xlabel("Recall (Sensitivity)", fontsize=15)
plt.yticks(np.arange(0.0, 1.1, step=0.1))
plt.ylabel("Precision (PPV)", fontsize=15)
plt.legend(loc="upper left", bbox_to_anchor=(1.01,1))
#Export and/or show plot
plt.savefig(full_path+'/model_evaluation/Summary_PRC.png', bbox_inches="tight")
if eval(jupyterRun):
plt.show()
else:
plt.close('all')
def saveMetricMeans(full_path,metrics,metric_dict):
""" Exports csv file with average metric values (over all CVs) for each ML modeling algorithm"""
with open(full_path+'/model_evaluation/Summary_performance_mean.csv',mode='w', newline="") as file:
writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
e = ['']
e.extend(metrics)
writer.writerow(e) #Write headers (balanced accuracy, etc.)
for algorithm in metric_dict:
astats = []
for l in list(metric_dict[algorithm].values()):
l = [float(i) for i in l]
meani = mean(l)
std = stdev(l)
astats.append(str(meani))
toAdd = [algorithm]
toAdd.extend(astats)
writer.writerow(toAdd)
file.close()
def saveMetricStd(full_path,metrics,metric_dict):
""" Exports csv file with metric value standard deviations (over all CVs) for each ML modeling algorithm"""
with open(full_path + '/model_evaluation/Summary_performance_std.csv', mode='w', newline="") as file:
writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
e = ['']
e.extend(metrics)
writer.writerow(e) # Write headers (balanced accuracy, etc.)
for algorithm in metric_dict:
astats = []
for l in list(metric_dict[algorithm].values()):
l = [float(i) for i in l]
std = stdev(l)
astats.append(str(std))
toAdd = [algorithm]
toAdd.extend(astats)
writer.writerow(toAdd)
file.close()
def metricBoxplots(full_path,metrics,algorithms,metric_dict,jupyterRun):
""" Export boxplots comparing algorithm performance for each standard metric"""
if not os.path.exists(full_path + '/model_evaluation/metricBoxplots'):
os.mkdir(full_path + '/model_evaluation/metricBoxplots')
for metric in metrics:
tempList = []
for algorithm in algorithms:
tempList.append(metric_dict[algorithm][metric])
td = pd.DataFrame(tempList)
td = td.transpose()
td.columns = algorithms
#Generate boxplot
boxplot = td.boxplot(column=algorithms,rot=90)
#Specify plot labels
plt.ylabel(str(metric))
plt.xlabel('ML Algorithm')
#Export and/or show plot
plt.savefig(full_path + '/model_evaluation/metricBoxplots/Compare_'+metric+'.png', bbox_inches="tight")
if eval(jupyterRun):
plt.show()
else:
plt.close('all')
def kruskalWallis(full_path,metrics,algorithms,metric_dict,sig_cutoff):
""" Apply non-parametric Kruskal Wallis one-way ANOVA on ranks. Determines if there is a statistically significant difference in algorithm performance across CV runs.
Completed for each standard metric separately."""
# Create directory to store significance testing results (used for both Kruskal Wallis and MannWhitney U-test)
if not os.path.exists(full_path + '/model_evaluation/statistical_comparisons'):
os.mkdir(full_path + '/model_evaluation/statistical_comparisons')
#Create dataframe to store analysis results for each metric
label = ['Statistic', 'P-Value', 'Sig(*)']
kruskal_summary = pd.DataFrame(index=metrics, columns=label)
#Apply Kruskal Wallis test for each metric
for metric in metrics:
tempArray = []
for algorithm in algorithms:
tempArray.append(metric_dict[algorithm][metric])
try:
result = stats.kruskal(*tempArray)
except:
result = [tempArray[0],1]
kruskal_summary.at[metric, 'Statistic'] = str(round(result[0], 6))
kruskal_summary.at[metric, 'P-Value'] = str(round(result[1], 6))
if result[1] < sig_cutoff:
kruskal_summary.at[metric, 'Sig(*)'] = str('*')
else:
kruskal_summary.at[metric, 'Sig(*)'] = str('')
#Export analysis summary to .csv file
kruskal_summary.to_csv(full_path + '/model_evaluation/statistical_comparisons/KruskalWallis.csv')
return kruskal_summary
def wilcoxonRank(full_path,metrics,algorithms,metric_dict,kruskal_summary,sig_cutoff):
""" Apply non-parametric Wilcoxon signed-rank test (pairwise comparisons). If a significant Kruskal Wallis algorithm difference was found for a given metric, Wilcoxon tests individual algorithm pairs
to determine if there is a statistically significant difference in algorithm performance across CV runs. Test statistic will be zero if all scores from one set are
larger than the other."""
for metric in metrics:
if kruskal_summary['Sig(*)'][metric] == '*':
wilcoxon_stats = []
done = []
for algorithm1 in algorithms:
for algorithm2 in algorithms:
if not [algorithm1,algorithm2] in done and not [algorithm2,algorithm1] in done and algorithm1 != algorithm2:
set1 = metric_dict[algorithm1][metric]
set2 = metric_dict[algorithm2][metric]
#handle error when metric values are equal for both algorithms
if set1 == set2: # Check if all nums are equal in sets
report = ['NA',1]
else: # Apply Wilcoxon Rank Sum test
report = stats.wilcoxon(set1,set2)
#Summarize test information in list
tempstats = [algorithm1,algorithm2,report[0],report[1],'']
if report[1] < sig_cutoff:
tempstats[4] = '*'
wilcoxon_stats.append(tempstats)
done.append([algorithm1,algorithm2])
#Export test results
wilcoxon_stats_df = pd.DataFrame(wilcoxon_stats)
wilcoxon_stats_df.columns = ['Algorithm 1', 'Algorithm 2', 'Statistic', 'P-Value', 'Sig(*)']
wilcoxon_stats_df.to_csv(full_path + '/model_evaluation/statistical_comparisons/WilcoxonRank_'+metric+'.csv', index=False)
def mannWhitneyU(full_path,metrics,algorithms,metric_dict,kruskal_summary,sig_cutoff):
""" Apply non-parametric Mann Whitney U-test (pairwise comparisons). If a significant Kruskal Wallis algorithm difference was found for a given metric, Mann Whitney tests individual algorithm pairs
to determine if there is a statistically significant difference in algorithm performance across CV runs. Test statistic will be zero if all scores from one set are
larger than the other."""
for metric in metrics:
if kruskal_summary['Sig(*)'][metric] == '*':
mann_stats = []
done = []
for algorithm1 in algorithms:
for algorithm2 in algorithms:
if not [algorithm1,algorithm2] in done and not [algorithm2,algorithm1] in done and algorithm1 != algorithm2:
set1 = metric_dict[algorithm1][metric]
set2 = metric_dict[algorithm2][metric]
if set1 == set2: # Check if all nums are equal in sets
report = ['NA',1]
else: #Apply Mann Whitney U test
report = stats.mannwhitneyu(set1,set2)
#Summarize test information in list
tempstats = [algorithm1,algorithm2,report[0],report[1],'']
if report[1] < sig_cutoff:
tempstats[4] = '*'
mann_stats.append(tempstats)
done.append([algorithm1,algorithm2])
#Export test results
mann_stats_df = pd.DataFrame(mann_stats)
mann_stats_df.columns = ['Algorithm 1', 'Algorithm 2', 'Statistic', 'P-Value', 'Sig(*)']
mann_stats_df.to_csv(full_path + '/model_evaluation/statistical_comparisons/MannWhitneyU_'+metric+'.csv', index=False)
def prepFI(algorithms,full_path,abbrev,metric_dict,primary_metric):
""" Organizes and prepares model feature importance data for boxplot and composite feature importance figure generation."""
#Initialize required lists
fi_df_list = [] # algorithm feature importance dataframe list (used to generate FI boxplots for each algorithm)
fi_ave_list = [] # algorithm feature importance averages list (used to generate composite FI barplots)
ave_metric_list = [] # algorithm focus metric averages list (used in weighted FI viz)
all_feature_list = [] # list of pre-feature selection feature names as they appear in FI reports for each algorithm
#Get necessary feature importance data and primary metric data (currenly only 'balanced accuracy' can be used for this)
for algorithm in algorithms:
# Get relevant feature importance info
temp_df = pd.read_csv(full_path+'/model_evaluation/feature_importance/'+abbrev[algorithm]+"_FI.csv") #CV FI scores for all original features in dataset.
if algorithm == algorithms[0]: # Should be same for all algorithm files (i.e. all original features in standard CV dataset order)
all_feature_list = temp_df.columns.tolist()
fi_df_list.append(temp_df)
fi_ave_list.append(temp_df.mean().tolist()) #Saves average FI scores over CV runs
# Get relevant metric info
avgBA = mean(metric_dict[algorithm][primary_metric])
ave_metric_list.append(avgBA)
#Normalize Average Feature importance scores so they fall between (0 - 1)
fi_ave_norm_list = []
for each in fi_ave_list: # each algorithm
normList = []
for i in range(len(each)): #each feature (score) in original data order
if each[i] <= 0: #Feature importance scores assumed to be uninformative if at or below 0
normList.append(0)
else:
normList.append((each[i]) / (max(each)))
fi_ave_norm_list.append(normList)
#Identify features with non-zero averages (step towards excluding features that had zero feature importance for all algorithms)
alg_non_zero_FI_list = [] #stores list of feature name lists that are non-zero for each algorithm
for each in fi_ave_list: # each algorithm
temp_non_zero_list = []
for i in range(len(each)): # each feature
if each[i] > 0.0:
temp_non_zero_list.append(all_feature_list[i]) #add feature names with positive values (doesn't need to be normalized for this)
alg_non_zero_FI_list.append(temp_non_zero_list)
non_zero_union_features = alg_non_zero_FI_list[0] # grab first algorithm's list
#Identify union of features with non-zero averages over all algorithms (i.e. if any algorithm found a non-zero score it will be considered for inclusion in top feature visualizations)
for j in range(1, len(algorithms)):
non_zero_union_features = list(set(non_zero_union_features) | set(alg_non_zero_FI_list[j]))
non_zero_union_indexes = []
for i in non_zero_union_features:
non_zero_union_indexes.append(all_feature_list.index(i))
return fi_df_list,fi_ave_list,fi_ave_norm_list,ave_metric_list,all_feature_list,non_zero_union_features,non_zero_union_indexes
def selectForCompositeViz(top_results,non_zero_union_features,non_zero_union_indexes,algorithms,ave_metric_list,fi_ave_norm_list):
""" Identify list of top features over all algorithms to visualize (note that best features to vizualize are chosen using algorithm performance weighting and normalization:
frac plays no useful role here only for viz). All features included if there are fewer than 'top_results'. Top features are determined by the sum of performance
(i.e. balanced accuracy) weighted feature importances over all algorithms."""
featuresToViz = None
#Create performance weighted score sum dictionary for all features
scoreSumDict = {}
i = 0
for each in non_zero_union_features: # for each non-zero feature
for j in range(len(algorithms)): # for each algorithm
# grab target score from each algorithm
score = fi_ave_norm_list[j][non_zero_union_indexes[i]]
# multiply score by algorithm performance weight
weight = ave_metric_list[j]
if weight <= .5:
weight = 0
if not weight == 0:
weight = (weight - 0.5) / 0.5
score = score * weight
#score = score * ave_metric_list[j]
if not each in scoreSumDict:
scoreSumDict[each] = score
else:
scoreSumDict[each] += score
i += 1
# Sort features by decreasing score
scoreSumDict_features = sorted(scoreSumDict, key=lambda x: scoreSumDict[x], reverse=True)
if len(non_zero_union_features) > top_results: #Keep all features if there are fewer than specified top results
featuresToViz = scoreSumDict_features[0:top_results]
else:
featuresToViz = scoreSumDict_features
return featuresToViz #list of feature names to vizualize in composite FI plots.
def doFIBoxplots(full_path,fi_df_list,fi_ave_list,algorithms,original_headers,top_results, jupyterRun):
""" Generate individual feature importance boxplots for each algorithm """
algorithmCounter = 0
for algorithm in algorithms: #each algorithms
#Make average feature importance score dicitonary
scoreDict = {}
counter = 0
for ave_score in fi_ave_list[algorithmCounter]: #each feature
scoreDict[original_headers[counter]] = ave_score
counter += 1
# Sort features by decreasing score
scoreDict_features = sorted(scoreDict, key=lambda x: scoreDict[x], reverse=True)
#Make list of feature names to vizualize
if len(original_headers) > top_results:
featuresToViz = scoreDict_features[0:top_results]
else:
featuresToViz = scoreDict_features
# FI score dataframe for current algorithm
df = fi_df_list[algorithmCounter]
# Subset of dataframe (in ranked order) to vizualize
viz_df = df[featuresToViz]
#Generate Boxplot
fig = plt.figure(figsize=(15, 4))
boxplot = viz_df.boxplot(rot=90)
plt.title(algorithm)
plt.ylabel('Feature Importance Score')
plt.xlabel('Features')
plt.xticks(np.arange(1, len(featuresToViz) + 1), featuresToViz, rotation='vertical')
plt.savefig(full_path+'/model_evaluation/feature_importance/' + algorithm + '_boxplot',bbox_inches="tight")
if eval(jupyterRun):
plt.show()
else:
plt.close('all') #Identify and sort (decreaseing) features with top average FI
algorithmCounter += 1
def doFI_Histogram(full_path, fi_ave_list, algorithms, jupyterRun):
""" Generate histogram showing distribution of average feature importances scores for each algorithm. """
algorithmCounter = 0
for algorithm in algorithms: #each algorithms
aveScores = fi_ave_list[algorithmCounter]
#Plot a histogram of average feature importance
plt.hist(aveScores,bins=100)
plt.xlabel("Average Feature Importance")
plt.ylabel("Frequency")
plt.title("Histogram of Average Feature Importance for "+str(algorithm))
plt.xticks(rotation = 'vertical')
plt.savefig(full_path+'/model_evaluation/feature_importance/' + algorithm + '_histogram',bbox_inches="tight")
if eval(jupyterRun):
plt.show()
else:
plt.close('all')
def getFI_To_Viz_Sorted(featuresToViz,all_feature_list,algorithms,fi_ave_norm_list):
""" Takes a list of top features names for vizualization, gets their indexes. In every composite FI plot features are ordered the same way
they are selected for vizualization (i.e. normalized and performance weighted). Because of this feature bars are only perfectly ordered in
descending order for the normalized + performance weighted composite plot. """
#Get original feature indexs for selected feature names
feature_indexToViz = [] #indexes of top features
for i in featuresToViz:
feature_indexToViz.append(all_feature_list.index(i))
# Create list of top feature importance values in original dataset feature order
top_fi_ave_norm_list = [] #feature importance values of top features for each algorithm (list of lists)
for i in range(len(algorithms)):
tempList = []
for j in feature_indexToViz: #each top feature index
tempList.append(fi_ave_norm_list[i][j]) #add corresponding FI value
top_fi_ave_norm_list.append(tempList)
all_feature_listToViz = featuresToViz
return top_fi_ave_norm_list,all_feature_listToViz
def composite_FI_plot(fi_list, algorithms, algColors, all_feature_listToViz, figName,full_path,jupyterRun,yLabelText):
""" Generate composite feature importance plot given list of feature names and associated feature importance scores for each algorithm.
This is run for different transformations of the normalized feature importance scores. """
# Set basic plot properites
rc('font', weight='bold', size=16)
# The position of the bars on the x-axis
r = all_feature_listToViz #feature names
#Set width of bars
barWidth = 0.75
#Set figure dimensions
plt.figure(figsize=(24, 12))
#Plot first algorithm FI scores (lowest) bar
p1 = plt.bar(r, fi_list[0], color=algColors[0], edgecolor='white', width=barWidth)
#Automatically calculate space needed to plot next bar on top of the one before it
bottoms = [] #list of space used by previous algorithms for each feature (so next bar can be placed directly above it)
for i in range(len(algorithms) - 1):
for j in range(i + 1):
if j == 0:
bottom = np.array(fi_list[0])
else:
bottom += np.array(fi_list[j])
bottoms.append(bottom)
if not isinstance(bottoms, list):
bottoms = bottoms.tolist()
#Plot subsequent feature bars for each subsequent algorithm
ps = [p1[0]]
for i in range(len(algorithms) - 1):
p = plt.bar(r, fi_list[i + 1], bottom=bottoms[i], color=algColors[i + 1], edgecolor='white', width=barWidth)
ps.append(p[0])
lines = tuple(ps)
# Specify axes info and legend
plt.xticks(np.arange(len(all_feature_listToViz)), all_feature_listToViz, rotation='vertical')
plt.xlabel("Feature", fontsize=20)
plt.ylabel(yLabelText, fontsize=20)
#plt.legend(lines[::-1], algorithms[::-1],loc="upper left", bbox_to_anchor=(1.01,1)) #legend outside plot
plt.legend(lines[::-1], algorithms[::-1],loc="upper right")
#Export and/or show plot
plt.savefig(full_path+'/model_evaluation/feature_importance/Compare_FI_' + figName + '.png', bbox_inches='tight')
if eval(jupyterRun):
plt.show()
else:
plt.close('all')
def fracFI(top_fi_ave_norm_list):
""" Transforms feature scores so that they sum to 1 over all features for a given algorithm. This way the normalized and fracionated composit bar plot
offers equal total bar area for every algorithm. The intuition here is that if an algorithm gives the same FI scores for all top features it won't be
overly represented in the resulting plot (i.e. all features can have the same maximum feature importance which might lead to the impression that an
algorithm is working better than it is.) Instead, that maximum 'bar-real-estate' has to be divided by the total number of features. Notably, this
transformation has the potential to alter total algorithm FI bar height ranking of features. """
fracLists = []
for each in top_fi_ave_norm_list: #each algorithm
fracList = []
for i in range(len(each)): #each feature
if sum(each) == 0: #check that all feature scores are not zero to avoid zero division error
fracList.append(0)
else:
fracList.append((each[i] / (sum(each))))
fracLists.append(fracList)
return fracLists
def weightFI(ave_metric_list,top_fi_ave_norm_list):
""" Weights the feature importance scores by algorithm performance (intuitive because when interpreting feature importances we want to place more weight on better performing algorithms) """
# Prepare weights
weights = []
# replace all balanced accuraces <=.5 with 0 (i.e. these are no better than random chance)
for i in range(len(ave_metric_list)):
if ave_metric_list[i] <= .5:
ave_metric_list[i] = 0
# normalize balanced accuracies
for i in range(len(ave_metric_list)):
if ave_metric_list[i] == 0:
weights.append(0)
else:
weights.append((ave_metric_list[i] - 0.5) / 0.5)
# Weight normalized feature importances
weightedLists = []
for i in range(len(top_fi_ave_norm_list)): #each algorithm
weightList = np.multiply(weights[i], top_fi_ave_norm_list[i]).tolist()
weightedLists.append(weightList)
return weightedLists,weights
def weightFracFI(fracLists,weights):
""" Weight normalized and fractionated feature importances. """
weightedFracLists = []
for i in range(len(fracLists)):
weightList = np.multiply(weights[i], fracLists[i]).tolist()
weightedFracLists.append(weightList)
return weightedFracLists
def saveRuntime(full_path,job_start_time):
""" Save phase runtime """
runtime_file = open(full_path + '/runtime/runtime_Stats.txt', 'w')
runtime_file.write(str(time.time() - job_start_time))
runtime_file.close()
def parseRuntime(full_path,abbrev):
""" Loads runtime summaries from entire pipeline and parses them into a single summary file."""
dict = {}
for file_path in glob.glob(full_path+'/runtime/*.txt'):
f = open(file_path,'r')
val = float(f.readline())
ref = file_path.split('/')[-1].split('_')[1].split('.')[0]
if ref in abbrev:
ref = abbrev[ref]
if not ref in dict:
dict[ref] = val
else:
dict[ref] += val
with open(full_path+'/runtimes.csv',mode='w', newline="") as file:
writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(["Pipeline Component","Time (sec)"])
writer.writerow(["Exploratory Analysis",dict['exploratory']])
writer.writerow(["Preprocessing",dict['preprocessing']])
try:
writer.writerow(["Mutual Information",dict['mutualinformation']])
except:
pass
try:
writer.writerow(["MultiSURF",dict['multisurf']])
except:
pass
writer.writerow(["Feature Selection",dict['featureselection']])
try:
writer.writerow(["Naive Bayes",dict['NB']])
except:
pass
try:
writer.writerow(["Logistic Regression",dict['LR']])
except:
pass
try:
writer.writerow(["Decision Tree",dict['DT']])
except:
pass
try:
writer.writerow(["Random Forest",dict['RF']])
except:
pass
try:
writer.writerow(["Gradient Boosting",dict['GB']])
except:
pass
try:
writer.writerow(["XGB",dict['XGB']])
except:
pass
try:
writer.writerow(["LGB",dict['LGB']])
except:
pass
try:
writer.writerow(["Support Vector Machine",dict['SVM']])
except:
pass
try:
writer.writerow(["Artificial Neural Network",dict['ANN']])
except:
pass
try:
writer.writerow(["K Nearest Neighbors",dict['KN']])
except:
pass
try:
writer.writerow(["eLCS",dict['eLCS']])
except:
pass
try:
writer.writerow(["XCS",dict['XCS']])
except:
pass
try:
writer.writerow(["ExSTraCS",dict['ExSTraCS']])
except:
pass
writer.writerow(["Stats Summary",dict['Stats']])
if __name__ == '__main__':
job(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6],sys.argv[7],int(sys.argv[8]),sys.argv[9],sys.argv[10],int(sys.argv[11]),float(sys.argv[12]),sys.argv[13])