-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeep1.R
109 lines (81 loc) · 3.64 KB
/
deep1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
library(RCurl)
library(jsonlite)
library(caret)
library(e1071)
library(dplyr)
library(readr)
library(skimr)
library(statmod)
library(MASS)
# install.packages(c("nnet","neuralnet", "RSNNS", "h2o", "deepnet", "darch"))
library(nnet) # only one hidden layer
library(neuralnet) # only one hidden layer, advanced training possibs
library(RSNNS) # variety of NN models
library(deepnet) # DBN and RBM capabilities
library(darch) # same, but pure R code (slow)
library(h2o) # java-based, fast
# ------ INITIALIZING H2O
c1 <- h2o.init(max_mem_size = "3G", nthreads = 2)
# ------ FIRST MODEL TRAINING
# Case: We receive a df that says why label/number (Y variable) is predicted based on whether there are pixels in one of the 784 px areas
tr <- read_csv("data/train.csv")
dim(tr)
colnames(tr)
# skim(tr) # extremely sparse
tr$label <- as.factor(tr$label)
i <- 1:5000
dig.tr <- tr[i, -1] # w/o Y variable
dig.ts <- as.vector(t(tr[i, 1])) # needs to be a sole vector
barplot(table(dig.ts)) # table, because factor variable, must be in numeric, summarized format
# equal distribution, no need to alter the modelling approach
set.seed(1234)
dig.m1 <- caret::train(x = dig.tr, y = dig.ts, # train from the caret package, functions as a wrapper
method = "nnet",
tuneGrid = expand.grid( # grid for hyperparameter tuning
.size = c(5), # 5 hidden neurons
.decay = 0.1), # learning rate
trControl = trainControl(method = "none"),
MaxNWts = 10000, # max weights
maxit = 100) # max iterations
# takeaway: read documentation, mb methods expect something very special that
# is not written in the warning (factor/numeric value may be an indicator for vectors)
dig.pd2 <- predict(dig.m1)
barplot(table(dig.pd2))
# low accuracy because distribution is too differing
caret::confusionMatrix(xtabs(~dig.pd2 + dig.ts))
# for explanations, see PDF
# Kappa is more reliable especially for imbalanced datasets
# also in this case: lower, but the data is not imbalanced (9.8 to 11.6% prevalance)
# Kappa or Cohens Kappa is like classification accuracy,
# except that it is normalized at the baseline of random chance
# on your dataset. It is a more useful measure to use on problems
# that have an imbalance in the classes (e.g. 70-30 split for classes
# 0 and 1 and you can achieve 70% accuracy by predicting all instances
# are for class 0). Learn more about Kappa here.
# ------ BUILDING MORE ADVANCED MODELS
dig.m2 <- caret::train(x = dig.tr, y = dig.ts,
method = "nnet",
tuneGrid = expand.grid(
.size = c(10),
.decay = 0.1),
trControl = trainControl(method = "none"),
MaxNWts = 50000,
maxit = 100)
dig.pd2 <- predict(dig.m2)
barplot(table(dig.pd2))
caret::confusionMatrix(xtabs(~dig.pd2 + dig.ts))
dig.m3 <- mlp(as.matrix(dig.tr),
decodeClassLabels(dig.ts),
size = 10,
learnFunc = "Rprop",
shufflePatterns = F,
maxit = 60)
dig.pd3 <- fitted.values(dig.m3)
dig.pd3 <- encodeClassLabels(dig.pd3)
barplot(table(dig.pd3))
caret::confusionMatrix(xtabs(~I(dig.pd3 -1) + dig.ts))
# reminder from Predictive Analytics: using I for operations (to the power of, subtracttion)
# str(dig.pd3 - 1)
# class(I(dig.pd3))
# model did run through faster and with more accurate results
# Q: coincidence or based on the better algorithm (esp. the learning function)