sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_3.6.2 magrittr_1.5 tools_3.6.2 htmltools_0.4.0
## [5] yaml_2.2.1 Rcpp_1.0.3 stringi_1.4.6 rmarkdown_2.1
## [9] knitr_1.28 stringr_1.4.0 xfun_0.12 digest_0.6.24
## [13] rlang_0.4.4 evaluate_0.14
Source: https://tensorflow.rstudio.com/keras/articles/examples/mnist_mlp.html
In this example, we train an MLP (multi-layer perceptron) on the MNIST data set. Achieve testing accuracy 98.11% after 30 epochs.
The MNIST database (Modified National Institute of Standards and Technology database) is a large database of handwritten digits (\(28 \times 28\)) that is commonly used for training and testing machine learning algorithms.
60,000 training images, 10,000 testing images.
Aquire data:
library(keras)
mnist <- dataset_mnist()
x_train <- mnist$train$x
y_train <- mnist$train$y
x_test <- mnist$test$x
y_test <- mnist$test$y
Training set:
dim(x_train)
## [1] 60000 28 28
dim(y_train)
## [1] 60000
x_train[1, , ]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0 0 0 3
## [7,] 0 0 0 0 0 0 0 0 30 36 94 154 170
## [8,] 0 0 0 0 0 0 0 49 238 253 253 253 253
## [9,] 0 0 0 0 0 0 0 18 219 253 253 253 253
## [10,] 0 0 0 0 0 0 0 0 80 156 107 253 253
## [11,] 0 0 0 0 0 0 0 0 0 14 1 154 253
## [12,] 0 0 0 0 0 0 0 0 0 0 0 139 253
## [13,] 0 0 0 0 0 0 0 0 0 0 0 11 190
## [14,] 0 0 0 0 0 0 0 0 0 0 0 0 35
## [15,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [16,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [17,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [18,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [19,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [20,] 0 0 0 0 0 0 0 0 0 0 0 0 39
## [21,] 0 0 0 0 0 0 0 0 0 0 24 114 221
## [22,] 0 0 0 0 0 0 0 0 23 66 213 253 253
## [23,] 0 0 0 0 0 0 18 171 219 253 253 253 253
## [24,] 0 0 0 0 55 172 226 253 253 253 253 244 133
## [25,] 0 0 0 0 136 253 253 253 212 135 132 16 0
## [26,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [27,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [28,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## [1,] 0 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0 0 0
## [6,] 18 18 18 126 136 175 26 166 255 247 127 0
## [7,] 253 253 253 253 253 225 172 253 242 195 64 0
## [8,] 253 253 253 253 251 93 82 82 56 39 0 0
## [9,] 253 198 182 247 241 0 0 0 0 0 0 0
## [10,] 205 11 0 43 154 0 0 0 0 0 0 0
## [11,] 90 0 0 0 0 0 0 0 0 0 0 0
## [12,] 190 2 0 0 0 0 0 0 0 0 0 0
## [13,] 253 70 0 0 0 0 0 0 0 0 0 0
## [14,] 241 225 160 108 1 0 0 0 0 0 0 0
## [15,] 81 240 253 253 119 25 0 0 0 0 0 0
## [16,] 0 45 186 253 253 150 27 0 0 0 0 0
## [17,] 0 0 16 93 252 253 187 0 0 0 0 0
## [18,] 0 0 0 0 249 253 249 64 0 0 0 0
## [19,] 0 46 130 183 253 253 207 2 0 0 0 0
## [20,] 148 229 253 253 253 250 182 0 0 0 0 0
## [21,] 253 253 253 253 201 78 0 0 0 0 0 0
## [22,] 253 253 198 81 2 0 0 0 0 0 0 0
## [23,] 195 80 9 0 0 0 0 0 0 0 0 0
## [24,] 11 0 0 0 0 0 0 0 0 0 0 0
## [25,] 0 0 0 0 0 0 0 0 0 0 0 0
## [26,] 0 0 0 0 0 0 0 0 0 0 0 0
## [27,] 0 0 0 0 0 0 0 0 0 0 0 0
## [28,] 0 0 0 0 0 0 0 0 0 0 0 0
## [,26] [,27] [,28]
## [1,] 0 0 0
## [2,] 0 0 0
## [3,] 0 0 0
## [4,] 0 0 0
## [5,] 0 0 0
## [6,] 0 0 0
## [7,] 0 0 0
## [8,] 0 0 0
## [9,] 0 0 0
## [10,] 0 0 0
## [11,] 0 0 0
## [12,] 0 0 0
## [13,] 0 0 0
## [14,] 0 0 0
## [15,] 0 0 0
## [16,] 0 0 0
## [17,] 0 0 0
## [18,] 0 0 0
## [19,] 0 0 0
## [20,] 0 0 0
## [21,] 0 0 0
## [22,] 0 0 0
## [23,] 0 0 0
## [24,] 0 0 0
## [25,] 0 0 0
## [26,] 0 0 0
## [27,] 0 0 0
## [28,] 0 0 0
image(t(x_train[1, 28:1,]), useRaster=TRUE, axes=FALSE, col=grey(seq(0, 1, length = 256)))
y_train[1]
## [1] 5
Testing set:
dim(x_test)
## [1] 10000 28 28
dim(y_test)
## [1] 10000
Vectorize \(28 \times 28\) images into \(784\)-vectors and scale entries to [0, 1]:
# reshape
x_train <- array_reshape(x_train, c(nrow(x_train), 784))
x_test <- array_reshape(x_test, c(nrow(x_test), 784))
# rescale
x_train <- x_train / 255
x_test <- x_test / 255
dim(x_train)
## [1] 60000 784
dim(x_test)
## [1] 10000 784
Encode \(y\) as binary class matrix:
y_train <- to_categorical(y_train, 10)
y_test <- to_categorical(y_test, 10)
dim(y_train)
## [1] 60000 10
dim(y_test)
## [1] 10000 10
head(y_train)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 0 0 0 0 0 1 0 0 0 0
## [2,] 1 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 1 0 0 0 0 0
## [4,] 0 1 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 1
## [6,] 0 0 1 0 0 0 0 0 0 0
Define a sequential model (a linear stack of layers) with 2 fully-connected hidden layers (256 and 128 neurons):
model <- keras_model_sequential()
model %>%
layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>%
layer_dropout(rate = 0.4) %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dropout(rate = 0.3) %>%
layer_dense(units = 10, activation = 'softmax')
summary(model)
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## dense (Dense) (None, 256) 200960
## ________________________________________________________________________________
## dropout (Dropout) (None, 256) 0
## ________________________________________________________________________________
## dense_1 (Dense) (None, 128) 32896
## ________________________________________________________________________________
## dropout_1 (Dropout) (None, 128) 0
## ________________________________________________________________________________
## dense_2 (Dense) (None, 10) 1290
## ================================================================================
## Total params: 235,146
## Trainable params: 235,146
## Non-trainable params: 0
## ________________________________________________________________________________
Compile the model with appropriate loss function, optimizer, and metrics:
model %>% compile(
loss = 'categorical_crossentropy',
optimizer = optimizer_rmsprop(),
metrics = c('accuracy')
)
system.time({
history <- model %>% fit(
x_train, y_train,
epochs = 30, batch_size = 128,
validation_split = 0.2
)
})
## user system elapsed
## 186.417 74.953 98.668
plot(history)
Evaluate model performance on the test data:
model %>% evaluate(x_test, y_test)
## $loss
## [1] 0.1157451
##
## $accuracy
## [1] 0.9815
Generate predictions on new data:
model %>% predict_classes(x_test) %>% head()
## [1] 7 2 1 0 4 1
Suppose we want to fit a multinomial-logit model and use it as a baseline method to neural networks. How to do that? Of course we can use mlogit
or other packages. Instead we can fit the same model using keras, since multinomial-logit is just an MLP with (1) one input layer with linear activation and (2) one output layer with softmax link function.
# set up model
library(keras)
mlogit <- keras_model_sequential()
mlogit %>%
# layer_dense(units = 256, activation = 'linear', input_shape = c(784)) %>%
# layer_dropout(rate = 0.4) %>%
layer_dense(units = 10, activation = 'softmax', input_shape = c(784))
summary(mlogit)
## Model: "sequential_1"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## dense_3 (Dense) (None, 10) 7850
## ================================================================================
## Total params: 7,850
## Trainable params: 7,850
## Non-trainable params: 0
## ________________________________________________________________________________
# compile model
mlogit %>% compile(
loss = 'categorical_crossentropy',
optimizer = optimizer_rmsprop(),
metrics = c('accuracy')
)
# fit model
mlogit_history <- mlogit %>% fit(
x_train, y_train,
epochs = 20, batch_size = 128,
validation_split = 0.2
)
# Evaluate model performance on the test data:
mlogit %>% evaluate(x_test, y_test)
## $loss
## [1] 0.2715195
##
## $accuracy
## [1] 0.9272
Generate predictions on new data:
mlogit %>% predict_classes(x_test) %>% head()
## [1] 7 2 1 0 4 1
Expriment: Change the linear
activation to relu
in the multinomial-logit model and see the change in classification accuracy.