Auto encoder with R, MNIST in Deep Learning

Auto-encoders are trained to reproduce or predict the inputs–the hidden layers and neurons are not maps between an input and some other outcome, but are self (auto)-encoding.

We can use auto encoders to conduct dimensions reduction, lift overfitting and so on. We will talk about it in the next article.

h2o package of R provides many functions and arguments and can be suitable for auto encoder training.

How to evaluate the performance of auto encoders? Even though accuracy here is not as important as that of other NN models is(since we have inputs or half-outputs as out final encoders’ outcome), we can still use MSE of each encoding model to evaluate; another way we can look at the model results is to calculate how anomalous each case is; we can also extract the deep features, though it might be tricky to interpret.

### auto encoder example with MNIST###
library(jsonlite)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(h2o)
##skipped……
library(parallel)
library(doSNOW)
##skipped……
library(data.table)
##skipped……
options(width=70,digits=2)
load_image_file <- function(filename) {
  ret = list()
  f = file(filename,'rb')
  readBin(f,'integer',n=1,size=4,endian='big')
  ret$n = readBin(f,'integer',n=1,size=4,endian='big')
  nrow = readBin(f,'integer',n=1,size=4,endian='big')
  ncol = readBin(f,'integer',n=1,size=4,endian='big')
  x = readBin(f,'integer',n=ret$n*nrow*ncol,size=1,signed=F)
  ret$x = matrix(x, ncol=nrow*ncol, byrow=T)
  close(f)
  ret
}
load_label_file <- function(filename) {
  f = file(filename,'rb')
  readBin(f,'integer',n=1,size=4,endian='big')
  n = readBin(f,'integer',n=1,size=4,endian='big')
  y = readBin(f,'integer',n=n,size=1,signed=F)
  close(f)
  y
}
imagetraining<-as.data.frame(load_image_file("train-images-idx3-ubyte"))
imagetest<-as.data.frame(load_image_file("t10k-images-idx3-ubyte"))
labeltraining<-as.factor(load_label_file("train-labels-idx1-ubyte"))
labeltest<-as.factor(load_label_file("t10k-labels-idx1-ubyte"))
imagetraining[,1]<-labeltraining
imagetest[,1]<-labeltest
Training<-imagetraining
Test<-imagetest
sample_n<-5000
training<-Training[sample(60000,sample_n),]

cl<-h2o.init(max_mem_size = "20G",nthreads = 10)
##  Connection successful!
##
## R is connected to the H2O cluster:
##     H2O cluster uptime:         2 hours 21 minutes
##     H2O cluster version:        3.10.3.6
##     H2O cluster version age:    1 month and 21 days
##     H2O cluster name:           H2O_started_from_R_Charles_oaq713
##     H2O cluster total nodes:    1
##     H2O cluster total memory:   6.96 GB
##     H2O cluster total cores:    4
##     H2O cluster allowed cores:  4
##     H2O cluster healthy:        TRUE
##     H2O Connection ip:          localhost
##     H2O Connection port:        54321
##     H2O Connection proxy:       NA
##     R Version:                  R version 3.3.2 (2016-10-31)
h2odigits<-as.h2o(training, destination_frame = "h2odogits")
##
  |
  |                                                            |   0%
  |
  |============================================================| 100%
h2odigits_t<-as.h2o(Test, destination_frame = "h2odogits_t")
##
  |
  |                                                            |   0%
  |
  |============================================================| 100%
h2odigits_train_x<-h2odigits[,-1]
h2odigits_test_x<-h2odigits_t[,-1]
xnames<-colnames(h2odigits_train_x)

h2o_m_ae_50<-h2o.deeplearning(
  x=xnames, training_frame = h2odigits_train_x,
  validation_frame = h2odigits_test_x, activation="TanhWithDropout",
  autoencoder = T, hidden=c(50),epochs=20,sparsity_beta = 0,
  input_dropout_ratio = 0,hidden_dropout_ratios = c(0),
  l1=0,l2=0)
## Warning in .h2o.startModelJob(algo, params, h2oRestApiVersion): Dropping ### skipped……
h2o_m_ae_100<-h2o.deeplearning(
  x=xnames, training_frame = h2odigits_train_x,
  validation_frame = h2odigits_test_x, activation="TanhWithDropout",
  autoencoder = T, hidden=c(100),epochs=20,sparsity_beta = 0,
  input_dropout_ratio = 0,hidden_dropout_ratios = c(0),
  l1=0,l2=0)
## Warning in .h2o.startModelJob(algo, params, h2oRestApiVersion): Dropping ### skipped……
h2o_m_ae_100_s.5<-h2o.deeplearning(
  x=xnames, training_frame = h2odigits_train_x,
  validation_frame = h2odigits_test_x, activation="TanhWithDropout",
  autoencoder = T, hidden=c(100),epochs=20,sparsity_beta = 0.5,
  input_dropout_ratio = 0,hidden_dropout_ratios = c(0),
  l1=0,l2=0)
## Warning in .h2o.startModelJob(algo, params, h2oRestApiVersion): Dropping ### skipped……
h2o_m_ae_100_i.2<-h2o.deeplearning(
  x=xnames, training_frame = h2odigits_train_x,
  validation_frame = h2odigits_test_x, activation="TanhWithDropout",
  autoencoder = T, hidden=c(100),epochs=20,sparsity_beta = 0,
  input_dropout_ratio = 0.2,hidden_dropout_ratios = c(0),
  l1=0,l2=0)
### skipped……
###we can directly check the models MSEs to evaluate the results###
error_50<-as.data.frame(h2o.anomaly(h2o_m_ae_50,h2odigits_train_x))
error_100<-as.data.frame(h2o.anomaly(h2o_m_ae_100,h2odigits_train_x))
error_100_s.5<-as.data.frame(h2o.anomaly(h2o_m_ae_100_s.5,h2odigits_train_x))
error_100_i.2<-as.data.frame(h2o.anomaly(h2o_m_ae_100_i.2,h2odigits_train_x))
error<-as.data.table(rbind(
  cbind.data.frame(Model="50P",error_50),
  cbind.data.frame(Model="100P",error_100),
  cbind.data.frame(Model="100P 0.5 Sparse",error_100_s.5),
  cbind.data.frame(Model="100P 0.2 Dropout",error_100_i.2)
))
error_tmp<-cbind(error_50,error_100,error_100_s.5,error_100_i.2)
plot(error_tmp)

Screen Shot 2017-04-11 at 6.50.05 PM

error[which.min(error$Reconstruction.MSE),]
##    Model Reconstruction.MSE
## 1:  100P             0.0017
### we can also build hist of MSE with 99% of all the models and plot it###
percentile<-error[,.(
  Percentile=quantile(Reconstruction.MSE,probs=.99)
),by=Model]
library(ggplot2)
printout<-ggplot(error,aes(Reconstruction.MSE))+geom_histogram(
  binwidth = .001,fill="grey50")+geom_vline(aes(xintercept=Percentile),data=percentile,
                                            linetype=2)+theme_bw()+facet_wrap(~Model)
print(printout)

Screen Shot 2017-04-11 at 6.50.44 PM.png

###we can also examine the model results with the deep features from the models ###
feature_fun<-function(model){
  features<-as.data.frame(h2o.deepfeatures(model,h2odigits_train_x))
  rf<-cor(features)
  rf<-data.frame(r=rf[upper.tri(rf)])
  p<-ggplot(rf,aes(r))+geom_histogram(binwidth = 0.02)+
    theme_classic()
  print(p)
}
feature_fun(h2o_m_ae_50)
##
  |
  |                                                            |   0%
  |
  |============================================================| 100%

Screen Shot 2017-04-11 at 6.50.49 PM.png

feature_fun(h2o_m_ae_100)
### skipped……
feature_fun(h2o_m_ae_100_s.5)
### skipped……
feature_fun(h2o_m_ae_100_i.2)
### skipped……
### what if we build a 2nd layer of 10 neurons(the number of digits)###
h2o_m_ae_100_10<-h2o.deeplearning(
  x=xnames, training_frame = h2odigits_train_x,
  validation_frame = h2odigits_test_x, activation="TanhWithDropout",
  autoencoder = T, hidden=c(100,10),epochs=30,sparsity_beta = 0,
  input_dropout_ratio = 0,hidden_dropout_ratios = c(0,0),
  l1=0,l2=0)
## Warning in .h2o.startModelJob(algo, params, h2oRestApiVersion): Dropping
### skipped……
feature_fun(h2o_m_ae_100_10)
##
  |
  |                                                            |   0%
  |
  |============================================================| 100%

Screen Shot 2017-04-11 at 6.50.54 PM.png

as.data.frame(h2o.deepfeatures(h2o_m_ae_100_10,h2odigits_train_x,2))
##
  |
  |                                                            |   0%
  |
  |============================================================| 100%
head(feature_100_10)
##   DF.L2.C1 DF.L2.C2 DF.L2.C3 DF.L2.C4 DF.L2.C5 DF.L2.C6 DF.L2.C7
## 1    0.768    0.355    0.258     0.33    -0.70   -0.150     0.24
## 2   -0.549   -0.019   -0.919    -0.03    -0.33   -0.451    -0.31
## 3    0.109   -0.065   -0.730     0.21     0.20   -0.136    -0.56
## 4    0.569   -0.268    0.366     0.11    -0.63    0.208     0.51
## 5    0.276    0.459    0.389    -0.10     0.50    0.304    -0.40
## 6   -0.065   -0.412    0.017     0.63    -0.25    0.083     0.12
##   DF.L2.C8 DF.L2.C9 DF.L2.C10
## 1    0.168     0.48    -0.085
## 2    0.358     0.49    -0.413
## 3   -0.695    -0.55     0.391
## 4    0.290    -0.05     0.231
## 5    0.321    -0.53     0.580
## 6    0.054    -0.18     0.125

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s