forked from ichisa/R-Ladies-Freiburg-Back-Stage
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExercise.R
79 lines (48 loc) · 2.22 KB
/
Exercise.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
library(nnet)
require(neuralnet)
require(nnet)
require(ggplot2)
wines <- read.csv("wines.csv")
names(wines) <- c("label",
"Alcohol",
"Malic_acid",
"Ash",
"Alcalinity_of_ash",
"Magnesium",
"Total_phenols",
"Flavanoids",
"Nonflavanoid_phenols",
"Proanthocyanins",
"Color_intensity",
"Hue",
"OD280_OD315_of_diluted_wines",
"Proline")
head(wines)
#Plot the data:e.g.: X and y two different explanatory variables. Color the label.
# The wine dataset the variable label contains three different labels: 1,2 and 3.
# The usual practice, as far as I know, is to encode categorical variables as a “one hot” vector. For instance, if I had three classes, like in this case, I’d need to replace the label variable with three variables like these:
# l1,l2,l3
# 1,0,0
# 0,0,1
# ...
# Encode as a one hot vector multilabel data
wines_ <- cbind(wines[, 2:14], class.ind(as.factor(wines$label)))
# Set labels name
names(wines_) <- c(names(wines)[2:14],"l1","l2","l3") # we do not need the first column anymore - now we have the information in the "hot vector"
#Scale - Be carfull, only the predictors have to be scaled, not the "hot vector". (simmilar to the example of the presentation but subseting the right columns)
#Split training and test data - half data into training and half into test
#Fit the model - choose the NN structure - Fit more than one so that you can compare
nn <- neuralnet(l1 + l2 + l3 ~ )
# plot the nn
# Evaluate the model: Create a matrix where it is possible to see which label each wine had and wich label was predicted by the model - use for this the test dataset
# Compute predictions
pr.nn <- compute(nn, test_[, 1:13])
#Let’s have a look at the accuracy on the training set:
# Extract results
pr.nn_ <- pr.nn$net.result
head(pr.nn_)
# Accuracy (training set)
original_values <- max.col(test_[, 14:16])
pr.nn_2 <- max.col(pr.nn_)
mean(pr.nn_2 == original_values)
# Exercise from: https://www.r-bloggers.com/multilabel-classification-with-neuralnet-package/